From c8138546201712e64604deadf331fb04d6651e55 Mon Sep 17 00:00:00 2001 From: Dan Winship Date: Mon, 22 Sep 2025 09:33:31 -0400 Subject: [PATCH] Update comments around an old bug workaround --- cmd/kube-proxy/app/conntrack.go | 24 +++++++++--------------- cmd/kube-proxy/app/server_linux.go | 10 +++------- 2 files changed, 12 insertions(+), 22 deletions(-) diff --git a/cmd/kube-proxy/app/conntrack.go b/cmd/kube-proxy/app/conntrack.go index 83395129807..b8a9eda6011 100644 --- a/cmd/kube-proxy/app/conntrack.go +++ b/cmd/kube-proxy/app/conntrack.go @@ -54,17 +54,12 @@ var errReadOnlySysFS = errors.New("readOnlySysFS") func (rct realConntracker) SetMax(ctx context.Context, max int) error { logger := klog.FromContext(ctx) + logger.Info("Setting nf_conntrack_max", "nfConntrackMax", max) if err := rct.setIntSysCtl(ctx, "nf_conntrack_max", max); err != nil { return err } - logger.Info("Setting nf_conntrack_max", "nfConntrackMax", max) - // Linux does not support writing to /sys/module/nf_conntrack/parameters/hashsize - // when the writer process is not in the initial network namespace - // (https://github.com/torvalds/linux/blob/v4.10/net/netfilter/nf_conntrack_core.c#L1795-L1796). - // Usually that's fine. But in some configurations such as with github.com/kinvolk/kubeadm-nspawn, - // kube-proxy is in another netns. - // Therefore, check if writing in hashsize is necessary and skip the writing if not. + // Check if hashsize is large enough for the nf_conntrack_max value. hashsize, err := readIntStringFile("/sys/module/nf_conntrack/parameters/hashsize") if err != nil { return err @@ -73,13 +68,12 @@ func (rct realConntracker) SetMax(ctx context.Context, max int) error { return nil } - // sysfs is expected to be mounted as 'rw'. However, it may be - // unexpectedly mounted as 'ro' by docker because of a known docker - // issue (https://github.com/docker/docker/issues/24000). Setting - // conntrack will fail when sysfs is readonly. When that happens, we - // don't set conntrack hashsize and return a special error - // errReadOnlySysFS here. The caller should deal with - // errReadOnlySysFS differently. + // sysfs is expected to be mounted as 'rw'. However, it may be unexpectedly + // mounted as 'ro' by docker because of known bugs (https://issues.k8s.io/134108). + // In that case we return a special error errReadOnlySysFS here, which the caller + // should deal with specially. + // + // TODO: this workaround can go away once we no longer support containerd 1.7. writable, err := rct.isSysFSWritable(ctx) if err != nil { return err @@ -87,7 +81,7 @@ func (rct realConntracker) SetMax(ctx context.Context, max int) error { if !writable { return errReadOnlySysFS } - // TODO: generify this and sysctl to a new sysfs.WriteInt() + logger.Info("Setting conntrack hashsize", "conntrackHashsize", max/4) return writeIntStringFile("/sys/module/nf_conntrack/parameters/hashsize", max/4) } diff --git a/cmd/kube-proxy/app/server_linux.go b/cmd/kube-proxy/app/server_linux.go index d8ef32359c9..dde1e760c33 100644 --- a/cmd/kube-proxy/app/server_linux.go +++ b/cmd/kube-proxy/app/server_linux.go @@ -304,13 +304,9 @@ func (s *ProxyServer) setupConntrack(ctx context.Context, ct Conntracker) error if err != errReadOnlySysFS { return err } - // errReadOnlySysFS is caused by a known docker issue (https://github.com/docker/docker/issues/24000), - // the only remediation we know is to restart the docker daemon. - // Here we'll send an node event with specific reason and message, the - // administrator should decide whether and how to handle this issue, - // whether to drain the node and restart docker. Occurs in other container runtimes - // as well. - // TODO(random-liu): Remove this when the docker bug is fixed. + // errReadOnlySysFS means we ran into a known container runtim bug + // (https://issues.k8s.io/134108). For historical reasons we ignore + // this problem and just alert the admin that it occurred. const message = "CRI error: /sys is read-only: " + "cannot modify conntrack limits, problems may arise later (If running Docker, see docker issue #24000)" s.Recorder.Eventf(s.NodeRef, nil, v1.EventTypeWarning, err.Error(), "StartKubeProxy", message)