diff --git a/pkg/features/kube_features.go b/pkg/features/kube_features.go index fe246814bfd..ab8292b7b78 100644 --- a/pkg/features/kube_features.go +++ b/pkg/features/kube_features.go @@ -762,6 +762,14 @@ const ( // // Allows clients to request a duration for certificates issued via the Kubernetes CSR API. CSRDuration featuregate.Feature = "CSRDuration" + + // owner: @AkihiroSuda + // alpha: v1.22 + // + // Enables support for running kubelet in a user namespace. + // The user namespace has to be created before running kubelet. + // All the node components such as CRI need to be running in the same user namespace. + KubeletInUserNamespace featuregate.Feature = "KubeletInUserNamespace" ) func init() { @@ -876,6 +884,7 @@ var defaultKubernetesFeatureGates = map[featuregate.Feature]featuregate.FeatureS ReadWriteOncePod: {Default: false, PreRelease: featuregate.Alpha}, CSRDuration: {Default: true, PreRelease: featuregate.Beta}, DelegateFSGroupToCSIDriver: {Default: false, PreRelease: featuregate.Alpha}, + KubeletInUserNamespace: {Default: false, PreRelease: featuregate.Alpha}, // inherited features from generic apiserver, relisted here to get a conflict if it is changed // unintentionally on either side: diff --git a/pkg/kubelet/cm/container_manager_linux.go b/pkg/kubelet/cm/container_manager_linux.go index d98162ec8a8..03f5aa36503 100644 --- a/pkg/kubelet/cm/container_manager_linux.go +++ b/pkg/kubelet/cm/container_manager_linux.go @@ -39,6 +39,7 @@ import ( utilpath "k8s.io/utils/path" libcontainerdevices "github.com/opencontainers/runc/libcontainer/devices" + libcontaineruserns "github.com/opencontainers/runc/libcontainer/userns" v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" utilerrors "k8s.io/apimachinery/pkg/util/errors" @@ -455,6 +456,13 @@ func setupKernelTunables(option KernelTunableBehavior) error { klog.V(2).InfoS("Updating kernel flag", "flag", flag, "expectedValue", expectedValue, "actualValue", val) err = sysctl.SetSysctl(flag, expectedValue) if err != nil { + if libcontaineruserns.RunningInUserNS() { + if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.KubeletInUserNamespace) { + klog.V(2).InfoS("Updating kernel flag failed (running in UserNS, ignoring)", "flag", flag, "err", err) + continue + } + klog.ErrorS(err, "Updating kernel flag failed (Hint: enable KubeletInUserNamespace feature flag to ignore the error)", "flag", flag) + } errList = append(errList, err) } } diff --git a/pkg/kubelet/kubelet.go b/pkg/kubelet/kubelet.go index 43cad89dba0..4cf6fe201ab 100644 --- a/pkg/kubelet/kubelet.go +++ b/pkg/kubelet/kubelet.go @@ -34,6 +34,7 @@ import ( "k8s.io/client-go/informers" cadvisorapi "github.com/google/cadvisor/info/v1" + libcontaineruserns "github.com/opencontainers/runc/libcontainer/userns" "k8s.io/mount-utils" "k8s.io/utils/integer" @@ -481,7 +482,19 @@ func NewMainKubelet(kubeCfg *kubeletconfiginternal.KubeletConfiguration, oomWatcher, err := oomwatcher.NewWatcher(kubeDeps.Recorder) if err != nil { - return nil, err + if libcontaineruserns.RunningInUserNS() { + if utilfeature.DefaultFeatureGate.Enabled(features.KubeletInUserNamespace) { + // oomwatcher.NewWatcher returns "open /dev/kmsg: operation not permitted" error, + // when running in a user namespace with sysctl value `kernel.dmesg_restrict=1`. + klog.V(2).InfoS("Failed to create an oomWatcher (running in UserNS, ignoring)", "err", err) + oomWatcher = nil + } else { + klog.ErrorS(err, "Failed to create an oomWatcher (running in UserNS, Hint: enable KubeletInUserNamespace feature flag to ignore the error)") + return nil, err + } + } else { + return nil, err + } } clusterDNS := make([]net.IP, 0, len(kubeCfg.ClusterDNS)) @@ -1360,8 +1373,10 @@ func (kl *Kubelet) initializeModules() error { } // Start out of memory watcher. - if err := kl.oomWatcher.Start(kl.nodeRef); err != nil { - return fmt.Errorf("failed to start OOM watcher %v", err) + if kl.oomWatcher != nil { + if err := kl.oomWatcher.Start(kl.nodeRef); err != nil { + return fmt.Errorf("failed to start OOM watcher: %w", err) + } } // Start resource analyzer diff --git a/pkg/proxy/userspace/proxier.go b/pkg/proxy/userspace/proxier.go index b9c14d28a4e..381fa842a2f 100644 --- a/pkg/proxy/userspace/proxier.go +++ b/pkg/proxy/userspace/proxier.go @@ -26,14 +26,17 @@ import ( "sync/atomic" "time" + libcontaineruserns "github.com/opencontainers/runc/libcontainer/userns" v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/types" utilerrors "k8s.io/apimachinery/pkg/util/errors" utilnet "k8s.io/apimachinery/pkg/util/net" "k8s.io/apimachinery/pkg/util/runtime" "k8s.io/apimachinery/pkg/util/sets" + utilfeature "k8s.io/apiserver/pkg/util/feature" servicehelper "k8s.io/cloud-provider/service/helpers" "k8s.io/klog/v2" + kubefeatures "k8s.io/kubernetes/pkg/features" "k8s.io/kubernetes/pkg/proxy" "k8s.io/kubernetes/pkg/proxy/config" utilproxy "k8s.io/kubernetes/pkg/proxy/util" @@ -231,7 +234,11 @@ func NewCustomProxier(loadBalancer LoadBalancer, listenIP net.IP, iptables iptab err = setRLimit(64 * 1000) if err != nil { - return nil, fmt.Errorf("failed to set open file handler limit: %v", err) + if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.KubeletInUserNamespace) && libcontaineruserns.RunningInUserNS() { + klog.V(2).InfoS("Failed to set open file handler limit to 64000 (running in UserNS, ignoring)", "err", err) + } else { + return nil, fmt.Errorf("failed to set open file handler limit to 64000: %w", err) + } } proxyPorts := newPortAllocator(pr)