Update comments around an old bug workaround

This commit is contained in:
Dan Winship 2025-09-22 09:33:31 -04:00
parent a8a21aaf85
commit c813854620
2 changed files with 12 additions and 22 deletions

View file

@ -54,17 +54,12 @@ var errReadOnlySysFS = errors.New("readOnlySysFS")
func (rct realConntracker) SetMax(ctx context.Context, max int) error {
logger := klog.FromContext(ctx)
logger.Info("Setting nf_conntrack_max", "nfConntrackMax", max)
if err := rct.setIntSysCtl(ctx, "nf_conntrack_max", max); err != nil {
return err
}
logger.Info("Setting nf_conntrack_max", "nfConntrackMax", max)
// Linux does not support writing to /sys/module/nf_conntrack/parameters/hashsize
// when the writer process is not in the initial network namespace
// (https://github.com/torvalds/linux/blob/v4.10/net/netfilter/nf_conntrack_core.c#L1795-L1796).
// Usually that's fine. But in some configurations such as with github.com/kinvolk/kubeadm-nspawn,
// kube-proxy is in another netns.
// Therefore, check if writing in hashsize is necessary and skip the writing if not.
// Check if hashsize is large enough for the nf_conntrack_max value.
hashsize, err := readIntStringFile("/sys/module/nf_conntrack/parameters/hashsize")
if err != nil {
return err
@ -73,13 +68,12 @@ func (rct realConntracker) SetMax(ctx context.Context, max int) error {
return nil
}
// sysfs is expected to be mounted as 'rw'. However, it may be
// unexpectedly mounted as 'ro' by docker because of a known docker
// issue (https://github.com/docker/docker/issues/24000). Setting
// conntrack will fail when sysfs is readonly. When that happens, we
// don't set conntrack hashsize and return a special error
// errReadOnlySysFS here. The caller should deal with
// errReadOnlySysFS differently.
// sysfs is expected to be mounted as 'rw'. However, it may be unexpectedly
// mounted as 'ro' by docker because of known bugs (https://issues.k8s.io/134108).
// In that case we return a special error errReadOnlySysFS here, which the caller
// should deal with specially.
//
// TODO: this workaround can go away once we no longer support containerd 1.7.
writable, err := rct.isSysFSWritable(ctx)
if err != nil {
return err
@ -87,7 +81,7 @@ func (rct realConntracker) SetMax(ctx context.Context, max int) error {
if !writable {
return errReadOnlySysFS
}
// TODO: generify this and sysctl to a new sysfs.WriteInt()
logger.Info("Setting conntrack hashsize", "conntrackHashsize", max/4)
return writeIntStringFile("/sys/module/nf_conntrack/parameters/hashsize", max/4)
}

View file

@ -304,13 +304,9 @@ func (s *ProxyServer) setupConntrack(ctx context.Context, ct Conntracker) error
if err != errReadOnlySysFS {
return err
}
// errReadOnlySysFS is caused by a known docker issue (https://github.com/docker/docker/issues/24000),
// the only remediation we know is to restart the docker daemon.
// Here we'll send an node event with specific reason and message, the
// administrator should decide whether and how to handle this issue,
// whether to drain the node and restart docker. Occurs in other container runtimes
// as well.
// TODO(random-liu): Remove this when the docker bug is fixed.
// errReadOnlySysFS means we ran into a known container runtim bug
// (https://issues.k8s.io/134108). For historical reasons we ignore
// this problem and just alert the admin that it occurred.
const message = "CRI error: /sys is read-only: " +
"cannot modify conntrack limits, problems may arise later (If running Docker, see docker issue #24000)"
s.Recorder.Eventf(s.NodeRef, nil, v1.EventTypeWarning, err.Error(), "StartKubeProxy", message)