mirror of
https://github.com/kubernetes/kubernetes.git
synced 2026-05-28 04:04:39 -04:00
kubelet: migrate container manager to contextual logging
This commit is contained in:
parent
5a26d2af61
commit
9521398859
9 changed files with 92 additions and 65 deletions
|
|
@ -224,25 +224,25 @@ func int64Slice(in []int) []int64 {
|
|||
return out
|
||||
}
|
||||
|
||||
func podHasExclusiveCPUs(cr cpuAllocationReader, pod *v1.Pod) bool {
|
||||
func podHasExclusiveCPUs(logger klog.Logger, cr cpuAllocationReader, pod *v1.Pod) bool {
|
||||
for _, container := range pod.Spec.InitContainers {
|
||||
if containerHasExclusiveCPUs(cr, pod, &container) {
|
||||
if containerHasExclusiveCPUs(logger, cr, pod, &container) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
for _, container := range pod.Spec.Containers {
|
||||
if containerHasExclusiveCPUs(cr, pod, &container) {
|
||||
if containerHasExclusiveCPUs(logger, cr, pod, &container) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
klog.V(4).InfoS("Pod contains no container with pinned cpus", "podName", pod.Name)
|
||||
logger.V(4).Info("Pod contains no container with pinned cpus", "podName", pod.Name)
|
||||
return false
|
||||
}
|
||||
|
||||
func containerHasExclusiveCPUs(cr cpuAllocationReader, pod *v1.Pod, container *v1.Container) bool {
|
||||
func containerHasExclusiveCPUs(logger klog.Logger, cr cpuAllocationReader, pod *v1.Pod, container *v1.Container) bool {
|
||||
exclusiveCPUs := cr.GetExclusiveCPUs(string(pod.UID), container.Name)
|
||||
if !exclusiveCPUs.IsEmpty() {
|
||||
klog.V(4).InfoS("Container has pinned cpus", "podName", pod.Name, "containerName", container.Name)
|
||||
logger.V(4).Info("Container has pinned cpus", "podName", pod.Name, "containerName", container.Name)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
|
|
|
|||
|
|
@ -149,7 +149,7 @@ var _ ContainerManager = &containerManagerImpl{}
|
|||
// checks if the required cgroups subsystems are mounted.
|
||||
// As of now, only 'cpu' and 'memory' are required.
|
||||
// cpu quota is a soft requirement.
|
||||
func validateSystemRequirements(mountUtil mount.Interface) (features, error) {
|
||||
func validateSystemRequirements(logger klog.Logger, mountUtil mount.Interface) (features, error) {
|
||||
const (
|
||||
cgroupMountType = "cgroup"
|
||||
localErr = "system validation failed"
|
||||
|
|
@ -190,11 +190,11 @@ func validateSystemRequirements(mountUtil mount.Interface) (features, error) {
|
|||
// CPU cgroup is required and so it expected to be mounted at this point.
|
||||
periodExists, err := utilpath.Exists(utilpath.CheckFollowSymlink, path.Join(cpuMountPoint, "cpu.cfs_period_us"))
|
||||
if err != nil {
|
||||
klog.ErrorS(err, "Failed to detect if CPU cgroup cpu.cfs_period_us is available")
|
||||
logger.Error(err, "Failed to detect if CPU cgroup cpu.cfs_period_us is available")
|
||||
}
|
||||
quotaExists, err := utilpath.Exists(utilpath.CheckFollowSymlink, path.Join(cpuMountPoint, "cpu.cfs_quota_us"))
|
||||
if err != nil {
|
||||
klog.ErrorS(err, "Failed to detect if CPU cgroup cpu.cfs_quota_us is available")
|
||||
logger.Error(err, "Failed to detect if CPU cgroup cpu.cfs_quota_us is available")
|
||||
}
|
||||
if quotaExists && periodExists {
|
||||
f.cpuHardcapping = true
|
||||
|
|
@ -415,11 +415,17 @@ func (cm *containerManagerImpl) NewPodContainerManager() PodContainerManager {
|
|||
}
|
||||
|
||||
func (cm *containerManagerImpl) PodHasExclusiveCPUs(pod *v1.Pod) bool {
|
||||
return podHasExclusiveCPUs(cm.cpuManager, pod)
|
||||
// Use klog.TODO() because we currently do not have a proper logger to pass in.
|
||||
// Replace this with an appropriate logger when refactoring this function to accept a logger parameter.
|
||||
logger := klog.TODO()
|
||||
return podHasExclusiveCPUs(logger, cm.cpuManager, pod)
|
||||
}
|
||||
|
||||
func (cm *containerManagerImpl) ContainerHasExclusiveCPUs(pod *v1.Pod, container *v1.Container) bool {
|
||||
return containerHasExclusiveCPUs(cm.cpuManager, pod, container)
|
||||
// Use klog.TODO() because we currently do not have a proper logger to pass in.
|
||||
// Replace this with an appropriate logger when refactoring this function to accept a logger parameter.
|
||||
logger := klog.TODO()
|
||||
return containerHasExclusiveCPUs(logger, cm.cpuManager, pod, container)
|
||||
}
|
||||
|
||||
func (cm *containerManagerImpl) InternalContainerLifecycle() InternalContainerLifecycle {
|
||||
|
|
@ -450,7 +456,7 @@ const (
|
|||
|
||||
// setupKernelTunables validates kernel tunable flags are set as expected
|
||||
// depending upon the specified option, it will either warn, error, or modify the kernel tunable flags
|
||||
func setupKernelTunables(option KernelTunableBehavior) error {
|
||||
func setupKernelTunables(logger klog.Logger, option KernelTunableBehavior) error {
|
||||
desiredState := map[string]int{
|
||||
utilsysctl.VMOvercommitMemory: utilsysctl.VMOvercommitMemoryAlways,
|
||||
utilsysctl.VMPanicOnOOM: utilsysctl.VMPanicOnOOMInvokeOOMKiller,
|
||||
|
|
@ -477,17 +483,17 @@ func setupKernelTunables(option KernelTunableBehavior) error {
|
|||
case KernelTunableError:
|
||||
errList = append(errList, fmt.Errorf("invalid kernel flag: %v, expected value: %v, actual value: %v", flag, expectedValue, val))
|
||||
case KernelTunableWarn:
|
||||
klog.V(2).InfoS("Invalid kernel flag", "flag", flag, "expectedValue", expectedValue, "actualValue", val)
|
||||
logger.V(2).Info("Invalid kernel flag", "flag", flag, "expectedValue", expectedValue, "actualValue", val)
|
||||
case KernelTunableModify:
|
||||
klog.V(2).InfoS("Updating kernel flag", "flag", flag, "expectedValue", expectedValue, "actualValue", val)
|
||||
logger.V(2).Info("Updating kernel flag", "flag", flag, "expectedValue", expectedValue, "actualValue", val)
|
||||
err = sysctl.SetSysctl(flag, expectedValue)
|
||||
if err != nil {
|
||||
if inuserns.RunningInUserNS() {
|
||||
if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.KubeletInUserNamespace) {
|
||||
klog.V(2).InfoS("Updating kernel flag failed (running in UserNS, ignoring)", "flag", flag, "err", err)
|
||||
logger.V(2).Info("Updating kernel flag failed (running in UserNS, ignoring)", "flag", flag, "err", err)
|
||||
continue
|
||||
}
|
||||
klog.ErrorS(err, "Updating kernel flag failed (Hint: enable KubeletInUserNamespace feature flag to ignore the error)", "flag", flag)
|
||||
logger.Error(err, "Updating kernel flag failed (Hint: enable KubeletInUserNamespace feature flag to ignore the error)", "flag", flag)
|
||||
}
|
||||
errList = append(errList, err)
|
||||
}
|
||||
|
|
@ -499,7 +505,7 @@ func setupKernelTunables(option KernelTunableBehavior) error {
|
|||
func (cm *containerManagerImpl) setupNode(ctx context.Context, activePods ActivePodsFunc) error {
|
||||
logger := klog.FromContext(ctx)
|
||||
|
||||
f, err := validateSystemRequirements(cm.mountUtil)
|
||||
f, err := validateSystemRequirements(logger, cm.mountUtil)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
|
@ -510,7 +516,7 @@ func (cm *containerManagerImpl) setupNode(ctx context.Context, activePods Active
|
|||
if cm.GetNodeConfig().ProtectKernelDefaults {
|
||||
b = KernelTunableError
|
||||
}
|
||||
if err := setupKernelTunables(b); err != nil {
|
||||
if err := setupKernelTunables(logger, b); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
|
|
@ -541,7 +547,7 @@ func (cm *containerManagerImpl) setupNode(ctx context.Context, activePods Active
|
|||
return err
|
||||
}
|
||||
cont.ensureStateFunc = func(manager cgroups.Manager) error {
|
||||
return ensureSystemCgroups("/", manager)
|
||||
return ensureSystemCgroups(logger, "/", manager)
|
||||
}
|
||||
systemContainers = append(systemContainers, cont)
|
||||
}
|
||||
|
|
@ -769,23 +775,23 @@ func (cm *containerManagerImpl) SystemCgroupsLimit() v1.ResourceList {
|
|||
}
|
||||
}
|
||||
|
||||
func isProcessRunningInHost(pid int) (bool, error) {
|
||||
func isProcessRunningInHost(logger klog.Logger, pid int) (bool, error) {
|
||||
// Get init pid namespace.
|
||||
initPidNs, err := os.Readlink("/proc/1/ns/pid")
|
||||
if err != nil {
|
||||
return false, fmt.Errorf("failed to find pid namespace of init process")
|
||||
}
|
||||
klog.V(10).InfoS("Found init PID namespace", "namespace", initPidNs)
|
||||
logger.V(10).Info("Found init PID namespace", "namespace", initPidNs)
|
||||
processPidNs, err := os.Readlink(fmt.Sprintf("/proc/%d/ns/pid", pid))
|
||||
if err != nil {
|
||||
return false, fmt.Errorf("failed to find pid namespace of process %q", pid)
|
||||
}
|
||||
klog.V(10).InfoS("Process info", "pid", pid, "namespace", processPidNs)
|
||||
logger.V(10).Info("Process info", "pid", pid, "namespace", processPidNs)
|
||||
return initPidNs == processPidNs, nil
|
||||
}
|
||||
|
||||
func ensureProcessInContainerWithOOMScore(logger klog.Logger, pid int, oomScoreAdj int, manager cgroups.Manager) error {
|
||||
if runningInHost, err := isProcessRunningInHost(pid); err != nil {
|
||||
if runningInHost, err := isProcessRunningInHost(logger, pid); err != nil {
|
||||
// Err on the side of caution. Avoid moving the docker daemon unless we are able to identify its context.
|
||||
return err
|
||||
} else if !runningInHost {
|
||||
|
|
@ -885,7 +891,7 @@ func getContainer(logger klog.Logger, pid int) (string, error) {
|
|||
//
|
||||
// The reason of leaving kernel threads at root cgroup is that we don't want to tie the
|
||||
// execution of these threads with to-be defined /system quota and create priority inversions.
|
||||
func ensureSystemCgroups(rootCgroupPath string, manager cgroups.Manager) error {
|
||||
func ensureSystemCgroups(logger klog.Logger, rootCgroupPath string, manager cgroups.Manager) error {
|
||||
// Move non-kernel PIDs to the system container.
|
||||
// Only keep errors on latest attempt.
|
||||
var finalErr error
|
||||
|
|
@ -911,7 +917,7 @@ func ensureSystemCgroups(rootCgroupPath string, manager cgroups.Manager) error {
|
|||
return nil
|
||||
}
|
||||
|
||||
klog.V(3).InfoS("Moving non-kernel processes", "pids", pids)
|
||||
logger.V(3).Info("Moving non-kernel processes", "pids", pids)
|
||||
for _, pid := range pids {
|
||||
err := manager.Apply(pid)
|
||||
if err != nil {
|
||||
|
|
@ -940,6 +946,9 @@ func isKernelPid(pid int) bool {
|
|||
// GetCapacity returns node capacity data for "cpu", "memory", "ephemeral-storage", and "huge-pages*"
|
||||
// At present this method is only invoked when introspecting ephemeral storage
|
||||
func (cm *containerManagerImpl) GetCapacity(localStorageCapacityIsolation bool) v1.ResourceList {
|
||||
// Use klog.TODO() because we currently do not have a proper logger to pass in.
|
||||
// Replace this with an appropriate logger when refactoring this function to accept a logger parameter.
|
||||
logger := klog.TODO()
|
||||
if localStorageCapacityIsolation {
|
||||
// We store allocatable ephemeral-storage in the capacity property once we Start() the container manager
|
||||
if _, ok := cm.capacity[v1.ResourceEphemeralStorage]; !ok {
|
||||
|
|
@ -947,7 +956,7 @@ func (cm *containerManagerImpl) GetCapacity(localStorageCapacityIsolation bool)
|
|||
if cm.cadvisorInterface != nil {
|
||||
rootfs, err := cm.cadvisorInterface.RootFsInfo()
|
||||
if err != nil {
|
||||
klog.ErrorS(err, "Unable to get rootfs data from cAdvisor interface")
|
||||
logger.Error(err, "Unable to get rootfs data from cAdvisor interface")
|
||||
// If the rootfsinfo retrieval from cAdvisor fails for any reason, fallback to returning the capacity property with no ephemeral storage data
|
||||
return cm.capacity
|
||||
}
|
||||
|
|
@ -1012,6 +1021,9 @@ func (cm *containerManagerImpl) GetAllocatableMemory() []*podresourcesapi.Contai
|
|||
}
|
||||
|
||||
func (cm *containerManagerImpl) GetDynamicResources(pod *v1.Pod, container *v1.Container) []*podresourcesapi.DynamicResource {
|
||||
// Use klog.TODO() because we currently do not have a proper logger to pass in.
|
||||
// Replace this with an appropriate logger when refactoring this function to accept a logger parameter.
|
||||
logger := klog.TODO()
|
||||
if !utilfeature.DefaultFeatureGate.Enabled(kubefeatures.DynamicResourceAllocation) {
|
||||
return []*podresourcesapi.DynamicResource{}
|
||||
}
|
||||
|
|
@ -1019,7 +1031,7 @@ func (cm *containerManagerImpl) GetDynamicResources(pod *v1.Pod, container *v1.C
|
|||
var containerDynamicResources []*podresourcesapi.DynamicResource
|
||||
containerClaimInfos, err := cm.draManager.GetContainerClaimInfos(pod, container)
|
||||
if err != nil {
|
||||
klog.ErrorS(err, "Unable to get container claim info state")
|
||||
logger.Error(err, "Unable to get container claim info state")
|
||||
return []*podresourcesapi.DynamicResource{}
|
||||
}
|
||||
for _, containerClaimInfo := range containerClaimInfos {
|
||||
|
|
|
|||
|
|
@ -66,7 +66,8 @@ func fakeContainerMgrMountInt() mount.Interface {
|
|||
}
|
||||
|
||||
func TestCgroupMountValidationSuccess(t *testing.T) {
|
||||
f, err := validateSystemRequirements(fakeContainerMgrMountInt())
|
||||
logger, _ := ktesting.NewTestContext(t)
|
||||
f, err := validateSystemRequirements(logger, fakeContainerMgrMountInt())
|
||||
assert.NoError(t, err)
|
||||
if cgroups.IsCgroup2UnifiedMode() {
|
||||
assert.True(t, f.cpuHardcapping, "cpu hardcapping is expected to be enabled")
|
||||
|
|
@ -79,6 +80,7 @@ func TestCgroupMountValidationMemoryMissing(t *testing.T) {
|
|||
if cgroups.IsCgroup2UnifiedMode() {
|
||||
t.Skip("skipping cgroup v1 test on a cgroup v2 system")
|
||||
}
|
||||
logger, _ := ktesting.NewTestContext(t)
|
||||
mountInt := mount.NewFakeMounter(
|
||||
[]mount.MountPoint{
|
||||
{
|
||||
|
|
@ -97,7 +99,7 @@ func TestCgroupMountValidationMemoryMissing(t *testing.T) {
|
|||
Opts: []string{"rw", "relatime", "cpuacct"},
|
||||
},
|
||||
})
|
||||
_, err := validateSystemRequirements(mountInt)
|
||||
_, err := validateSystemRequirements(logger, mountInt)
|
||||
assert.Error(t, err)
|
||||
}
|
||||
|
||||
|
|
@ -105,6 +107,7 @@ func TestCgroupMountValidationMultipleSubsystem(t *testing.T) {
|
|||
if cgroups.IsCgroup2UnifiedMode() {
|
||||
t.Skip("skipping cgroup v1 test on a cgroup v2 system")
|
||||
}
|
||||
logger, _ := ktesting.NewTestContext(t)
|
||||
mountInt := mount.NewFakeMounter(
|
||||
[]mount.MountPoint{
|
||||
{
|
||||
|
|
@ -123,7 +126,7 @@ func TestCgroupMountValidationMultipleSubsystem(t *testing.T) {
|
|||
Opts: []string{"rw", "relatime", "cpuacct"},
|
||||
},
|
||||
})
|
||||
_, err := validateSystemRequirements(mountInt)
|
||||
_, err := validateSystemRequirements(logger, mountInt)
|
||||
assert.NoError(t, err)
|
||||
}
|
||||
|
||||
|
|
@ -144,6 +147,7 @@ func TestSoftRequirementsValidationSuccess(t *testing.T) {
|
|||
if cgroups.IsCgroup2UnifiedMode() {
|
||||
t.Skip("skipping cgroup v1 test on a cgroup v2 system")
|
||||
}
|
||||
logger, _ := ktesting.NewTestContext(t)
|
||||
req := require.New(t)
|
||||
tempDir, err := os.MkdirTemp("", "")
|
||||
req.NoError(err)
|
||||
|
|
@ -169,7 +173,7 @@ func TestSoftRequirementsValidationSuccess(t *testing.T) {
|
|||
Opts: []string{"rw", "relatime", "cpuacct", "memory"},
|
||||
},
|
||||
})
|
||||
f, err := validateSystemRequirements(mountInt)
|
||||
f, err := validateSystemRequirements(logger, mountInt)
|
||||
assert.NoError(t, err)
|
||||
assert.True(t, f.cpuHardcapping, "cpu hardcapping is expected to be enabled")
|
||||
}
|
||||
|
|
|
|||
|
|
@ -376,9 +376,15 @@ func (cm *containerManagerImpl) PodMightNeedToUnprepareResources(UID types.UID)
|
|||
}
|
||||
|
||||
func (cm *containerManagerImpl) PodHasExclusiveCPUs(pod *v1.Pod) bool {
|
||||
return podHasExclusiveCPUs(cm.cpuManager, pod)
|
||||
// Use klog.TODO() because we currently do not have a proper logger to pass in.
|
||||
// Replace this with an appropriate logger when refactoring this function to accept a logger parameter.
|
||||
logger := klog.TODO()
|
||||
return podHasExclusiveCPUs(logger, cm.cpuManager, pod)
|
||||
}
|
||||
|
||||
func (cm *containerManagerImpl) ContainerHasExclusiveCPUs(pod *v1.Pod, container *v1.Container) bool {
|
||||
return containerHasExclusiveCPUs(cm.cpuManager, pod, container)
|
||||
// Use klog.TODO() because we currently do not have a proper logger to pass in.
|
||||
// Replace this with an appropriate logger when refactoring this function to accept a logger parameter.
|
||||
logger := klog.TODO()
|
||||
return containerHasExclusiveCPUs(logger, cm.cpuManager, pod, container)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -55,9 +55,10 @@ var _ ContainerManager = &FakeContainerManager{}
|
|||
func NewFakeContainerManager() *FakeContainerManager {
|
||||
return &FakeContainerManager{
|
||||
PodContainerManager: NewFakePodContainerManager(),
|
||||
// Using klog.Background() for fake/test implementations where no real context is available
|
||||
cpuManager: cpumanager.NewFakeManager(klog.Background()),
|
||||
memoryManager: memorymanager.NewFakeManager(klog.Background()),
|
||||
// Use klog.TODO() because we currently do not have a proper logger to pass in.
|
||||
// Replace this with an appropriate logger when refactoring this function to accept a logger parameter.
|
||||
cpuManager: cpumanager.NewFakeManager(klog.TODO()),
|
||||
memoryManager: memorymanager.NewFakeManager(klog.TODO()),
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -22,7 +22,7 @@ import (
|
|||
"github.com/go-logr/logr"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
"k8s.io/klog/v2"
|
||||
"k8s.io/klog/v2/ktesting"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/cpumanager"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/memorymanager"
|
||||
"k8s.io/kubernetes/pkg/kubelet/cm/topologymanager"
|
||||
|
|
@ -89,7 +89,8 @@ func TestPreStartContainer(t *testing.T) {
|
|||
|
||||
for _, test := range tests {
|
||||
t.Run(test.name, func(t *testing.T) {
|
||||
_ = test.lifecycle.PreStartContainer(klog.Background(), pod, container, "42")
|
||||
logger, _ := ktesting.NewTestContext(t)
|
||||
_ = test.lifecycle.PreStartContainer(logger, pod, container, "42")
|
||||
})
|
||||
|
||||
cManager := test.lifecycle.cpuManager
|
||||
|
|
|
|||
|
|
@ -77,7 +77,7 @@ func (m *podContainerManagerImpl) EnsureExists(logger klog.Logger, pod *v1.Pod)
|
|||
if !alreadyExists {
|
||||
enforceCPULimits := m.enforceCPULimits
|
||||
if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.DisableCPUQuotaWithExclusiveCPUs) && m.podContainerManager.PodHasExclusiveCPUs(pod) {
|
||||
klog.V(2).InfoS("Disabled CFS quota", "pod", klog.KObj(pod))
|
||||
logger.V(2).Info("Disabled CFS quota", "pod", klog.KObj(pod))
|
||||
enforceCPULimits = false
|
||||
}
|
||||
enforceMemoryQoS := false
|
||||
|
|
@ -95,7 +95,7 @@ func (m *podContainerManagerImpl) EnsureExists(logger klog.Logger, pod *v1.Pod)
|
|||
containerConfig.ResourceParameters.PidsLimit = &m.podPidsLimit
|
||||
}
|
||||
if enforceMemoryQoS {
|
||||
klog.V(4).InfoS("MemoryQoS config for pod", "pod", klog.KObj(pod), "unified", containerConfig.ResourceParameters.Unified)
|
||||
logger.V(4).Info("MemoryQoS config for pod", "pod", klog.KObj(pod), "unified", containerConfig.ResourceParameters.Unified)
|
||||
}
|
||||
if err := m.cgroupManager.Create(logger, containerConfig); err != nil {
|
||||
return fmt.Errorf("failed to create container for %v : %v", podContainerName, err)
|
||||
|
|
@ -147,14 +147,14 @@ func (m *podContainerManagerImpl) SetPodCgroupConfig(logger klog.Logger, pod *v1
|
|||
}
|
||||
|
||||
// Kill one process ID
|
||||
func (m *podContainerManagerImpl) killOnePid(pid int) error {
|
||||
func (m *podContainerManagerImpl) killOnePid(logger klog.Logger, pid int) error {
|
||||
// os.FindProcess never returns an error on POSIX
|
||||
// https://go-review.googlesource.com/c/go/+/19093
|
||||
p, _ := os.FindProcess(pid)
|
||||
if err := p.Kill(); err != nil {
|
||||
// If the process already exited, that's fine.
|
||||
if errors.Is(err, os.ErrProcessDone) {
|
||||
klog.V(3).InfoS("Process no longer exists", "pid", pid)
|
||||
logger.V(3).Info("Process no longer exists", "pid", pid)
|
||||
return nil
|
||||
}
|
||||
return err
|
||||
|
|
@ -177,23 +177,23 @@ func (m *podContainerManagerImpl) tryKillingCgroupProcesses(logger klog.Logger,
|
|||
removed := map[int]bool{}
|
||||
for i := 0; i < 5; i++ {
|
||||
if i != 0 {
|
||||
klog.V(3).InfoS("Attempt failed to kill all unwanted process from cgroup, retrying", "attempt", i, "cgroupName", podCgroup)
|
||||
logger.V(3).Info("Attempt failed to kill all unwanted process from cgroup, retrying", "attempt", i, "cgroupName", podCgroup)
|
||||
}
|
||||
errlist = []error{}
|
||||
for _, pid := range pidsToKill {
|
||||
if _, ok := removed[pid]; ok {
|
||||
continue
|
||||
}
|
||||
klog.V(3).InfoS("Attempting to kill process from cgroup", "pid", pid, "cgroupName", podCgroup)
|
||||
if err := m.killOnePid(pid); err != nil {
|
||||
klog.V(3).InfoS("Failed to kill process from cgroup", "pid", pid, "cgroupName", podCgroup, "err", err)
|
||||
logger.V(3).Info("Attempting to kill process from cgroup", "pid", pid, "cgroupName", podCgroup)
|
||||
if err := m.killOnePid(logger, pid); err != nil {
|
||||
logger.V(3).Info("Failed to kill process from cgroup", "pid", pid, "cgroupName", podCgroup, "err", err)
|
||||
errlist = append(errlist, err)
|
||||
} else {
|
||||
removed[pid] = true
|
||||
}
|
||||
}
|
||||
if len(errlist) == 0 {
|
||||
klog.V(3).InfoS("Successfully killed all unwanted processes from cgroup", "cgroupName", podCgroup)
|
||||
logger.V(3).Info("Successfully killed all unwanted processes from cgroup", "cgroupName", podCgroup)
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
|
@ -253,6 +253,9 @@ func (m *podContainerManagerImpl) IsPodCgroup(cgroupfs string) (bool, types.UID)
|
|||
// GetAllPodsFromCgroups scans through all the subsystems of pod cgroups
|
||||
// Get list of pods whose cgroup still exist on the cgroup mounts
|
||||
func (m *podContainerManagerImpl) GetAllPodsFromCgroups() (map[types.UID]CgroupName, error) {
|
||||
// Use klog.TODO() because we currently do not have a proper logger to pass in.
|
||||
// Replace this with an appropriate logger when refactoring this function to accept a logger parameter.
|
||||
logger := klog.TODO()
|
||||
// Map for storing all the found pods on the disk
|
||||
foundPods := make(map[types.UID]CgroupName)
|
||||
qosContainersList := [3]CgroupName{m.qosContainersInfo.BestEffort, m.qosContainersInfo.Burstable, m.qosContainersInfo.Guaranteed}
|
||||
|
|
@ -293,7 +296,7 @@ func (m *podContainerManagerImpl) GetAllPodsFromCgroups() (map[types.UID]CgroupN
|
|||
parts := strings.Split(basePath, podCgroupNamePrefix)
|
||||
// the uid is missing, so we log the unexpected cgroup not of form pod<uid>
|
||||
if len(parts) != 2 {
|
||||
klog.InfoS("Pod cgroup manager ignored unexpected cgroup because it is not a pod", "path", cgroupfsPath)
|
||||
logger.Info("Pod cgroup manager ignored unexpected cgroup because it is not a pod", "path", cgroupfsPath)
|
||||
continue
|
||||
}
|
||||
podUID := parts[1]
|
||||
|
|
|
|||
|
|
@ -141,7 +141,7 @@ func (m *qosContainerManagerImpl) Start(ctx context.Context, getNodeAllocatable
|
|||
go wait.Until(func() {
|
||||
err := m.UpdateCgroups(logger)
|
||||
if err != nil {
|
||||
klog.InfoS("Failed to reserve QoS requests", "err", err)
|
||||
logger.Info("Failed to reserve QoS requests", "err", err)
|
||||
}
|
||||
}, periodicQOSCgroupUpdateInterval, wait.NeverStop)
|
||||
|
||||
|
|
@ -233,23 +233,23 @@ func (m *qosContainerManagerImpl) getQoSMemoryRequests() map[v1.PodQOSClass]int6
|
|||
// setMemoryReserve sums the memory limits of all pods in a QOS class,
|
||||
// calculates QOS class memory limits, and set those limits in the
|
||||
// CgroupConfig for each QOS class.
|
||||
func (m *qosContainerManagerImpl) setMemoryReserve(configs map[v1.PodQOSClass]*CgroupConfig, percentReserve int64) {
|
||||
func (m *qosContainerManagerImpl) setMemoryReserve(logger klog.Logger, configs map[v1.PodQOSClass]*CgroupConfig, percentReserve int64) {
|
||||
qosMemoryRequests := m.getQoSMemoryRequests()
|
||||
|
||||
resources := m.getNodeAllocatable()
|
||||
allocatableResource, ok := resources[v1.ResourceMemory]
|
||||
if !ok {
|
||||
klog.V(2).InfoS("Allocatable memory value could not be determined, not setting QoS memory limits")
|
||||
logger.V(2).Info("Allocatable memory value could not be determined, not setting QoS memory limits")
|
||||
return
|
||||
}
|
||||
allocatable := allocatableResource.Value()
|
||||
if allocatable == 0 {
|
||||
klog.V(2).InfoS("Allocatable memory reported as 0, might be in standalone mode, not setting QoS memory limits")
|
||||
logger.V(2).Info("Allocatable memory reported as 0, might be in standalone mode, not setting QoS memory limits")
|
||||
return
|
||||
}
|
||||
|
||||
for qos, limits := range qosMemoryRequests {
|
||||
klog.V(2).InfoS("QoS pod memory limit", "qos", qos, "limits", limits, "percentReserve", percentReserve)
|
||||
logger.V(2).Info("QoS pod memory limit", "qos", qos, "limits", limits, "percentReserve", percentReserve)
|
||||
}
|
||||
|
||||
// Calculate QOS memory limits
|
||||
|
|
@ -262,14 +262,14 @@ func (m *qosContainerManagerImpl) setMemoryReserve(configs map[v1.PodQOSClass]*C
|
|||
// retrySetMemoryReserve checks for any QoS cgroups over the limit
|
||||
// that was attempted to be set in the first Update() and adjusts
|
||||
// their memory limit to the usage to prevent further growth.
|
||||
func (m *qosContainerManagerImpl) retrySetMemoryReserve(configs map[v1.PodQOSClass]*CgroupConfig, percentReserve int64) {
|
||||
func (m *qosContainerManagerImpl) retrySetMemoryReserve(logger klog.Logger, configs map[v1.PodQOSClass]*CgroupConfig, percentReserve int64) {
|
||||
// Unreclaimable memory usage may already exceeded the desired limit
|
||||
// Attempt to set the limit near the current usage to put pressure
|
||||
// on the cgroup and prevent further growth.
|
||||
for qos, config := range configs {
|
||||
usage, err := m.cgroupManager.MemoryUsage(config.Name)
|
||||
if err != nil {
|
||||
klog.V(2).InfoS("Failed to get resource stats", "err", err)
|
||||
logger.V(2).Info("Failed to get resource stats", "err", err)
|
||||
return
|
||||
}
|
||||
|
||||
|
|
@ -287,7 +287,7 @@ func (m *qosContainerManagerImpl) retrySetMemoryReserve(configs map[v1.PodQOSCla
|
|||
|
||||
// setMemoryQoS sums the memory requests of all pods in the Burstable class,
|
||||
// and set the sum memory as the memory.min in the Unified field of CgroupConfig.
|
||||
func (m *qosContainerManagerImpl) setMemoryQoS(configs map[v1.PodQOSClass]*CgroupConfig) {
|
||||
func (m *qosContainerManagerImpl) setMemoryQoS(logger klog.Logger, configs map[v1.PodQOSClass]*CgroupConfig) {
|
||||
qosMemoryRequests := m.getQoSMemoryRequests()
|
||||
|
||||
// Calculate the memory.min:
|
||||
|
|
@ -301,7 +301,7 @@ func (m *qosContainerManagerImpl) setMemoryQoS(configs map[v1.PodQOSClass]*Cgrou
|
|||
configs[v1.PodQOSBurstable].ResourceParameters.Unified = make(map[string]string)
|
||||
}
|
||||
configs[v1.PodQOSBurstable].ResourceParameters.Unified[Cgroup2MemoryMin] = strconv.FormatInt(burstableMin, 10)
|
||||
klog.V(4).InfoS("MemoryQoS config for qos", "qos", v1.PodQOSBurstable, "memoryMin", burstableMin)
|
||||
logger.V(4).Info("MemoryQoS config for qos", "qos", v1.PodQOSBurstable, "memoryMin", burstableMin)
|
||||
}
|
||||
|
||||
if guaranteedMin > 0 {
|
||||
|
|
@ -309,7 +309,7 @@ func (m *qosContainerManagerImpl) setMemoryQoS(configs map[v1.PodQOSClass]*Cgrou
|
|||
configs[v1.PodQOSGuaranteed].ResourceParameters.Unified = make(map[string]string)
|
||||
}
|
||||
configs[v1.PodQOSGuaranteed].ResourceParameters.Unified[Cgroup2MemoryMin] = strconv.FormatInt(guaranteedMin, 10)
|
||||
klog.V(4).InfoS("MemoryQoS config for qos", "qos", v1.PodQOSGuaranteed, "memoryMin", guaranteedMin)
|
||||
logger.V(4).Info("MemoryQoS config for qos", "qos", v1.PodQOSGuaranteed, "memoryMin", guaranteedMin)
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -345,14 +345,14 @@ func (m *qosContainerManagerImpl) UpdateCgroups(logger logr.Logger) error {
|
|||
// update the qos level cgrougs v2 settings of memory qos if feature enabled
|
||||
if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.MemoryQoS) &&
|
||||
libcontainercgroups.IsCgroup2UnifiedMode() {
|
||||
m.setMemoryQoS(qosConfigs)
|
||||
m.setMemoryQoS(logger, qosConfigs)
|
||||
}
|
||||
|
||||
if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.QOSReserved) {
|
||||
for resource, percentReserve := range m.qosReserved {
|
||||
switch resource {
|
||||
case v1.ResourceMemory:
|
||||
m.setMemoryReserve(qosConfigs, percentReserve)
|
||||
m.setMemoryReserve(logger, qosConfigs, percentReserve)
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -364,7 +364,7 @@ func (m *qosContainerManagerImpl) UpdateCgroups(logger logr.Logger) error {
|
|||
}
|
||||
}
|
||||
if updateSuccess {
|
||||
klog.V(4).InfoS("Updated QoS cgroup configuration")
|
||||
logger.V(4).Info("Updated QoS cgroup configuration")
|
||||
return nil
|
||||
}
|
||||
|
||||
|
|
@ -374,7 +374,7 @@ func (m *qosContainerManagerImpl) UpdateCgroups(logger logr.Logger) error {
|
|||
for resource, percentReserve := range m.qosReserved {
|
||||
switch resource {
|
||||
case v1.ResourceMemory:
|
||||
m.retrySetMemoryReserve(qosConfigs, percentReserve)
|
||||
m.retrySetMemoryReserve(logger, qosConfigs, percentReserve)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -382,12 +382,12 @@ func (m *qosContainerManagerImpl) UpdateCgroups(logger logr.Logger) error {
|
|||
for _, config := range qosConfigs {
|
||||
err := m.cgroupManager.Update(logger, config)
|
||||
if err != nil {
|
||||
klog.ErrorS(err, "Failed to update QoS cgroup configuration")
|
||||
logger.Error(err, "Failed to update QoS cgroup configuration")
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
klog.V(4).InfoS("Updated QoS cgroup configuration")
|
||||
logger.V(4).Info("Updated QoS cgroup configuration")
|
||||
return nil
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -149,7 +149,7 @@ func TestQoSContainerCgroup(t *testing.T) {
|
|||
},
|
||||
}
|
||||
|
||||
m.setMemoryQoS(qosConfigs)
|
||||
m.setMemoryQoS(logger, qosConfigs)
|
||||
|
||||
burstableMin := resource.MustParse("384Mi")
|
||||
guaranteedMin := resource.MustParse("128Mi")
|
||||
|
|
|
|||
Loading…
Reference in a new issue