From 9daabbd6c73aac543ef8dfd5326098eb4accfa23 Mon Sep 17 00:00:00 2001 From: Antonio Ojea Date: Fri, 24 Apr 2026 11:33:01 +0000 Subject: [PATCH] kube-proxy: don't do full periodic syncs on large cluster mode Periodic full-syncs are just reconcile loops just in case somehow the dataplane has drifted, however, they have an important cost on large clusters. We can avoid to perform full-sync if kube-proxy is in the "largecluster" mode, we are already doing some optimization, so it is reasonable to avoid the penalty of a full sync for a "just in case" operation. Signed-off-by: Antonio Ojea --- pkg/proxy/iptables/proxier.go | 4 +++- pkg/proxy/iptables/proxier_test.go | 15 +++++++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/pkg/proxy/iptables/proxier.go b/pkg/proxy/iptables/proxier.go index 3e4e52094e4..4f620ad65bd 100644 --- a/pkg/proxy/iptables/proxier.go +++ b/pkg/proxy/iptables/proxier.go @@ -648,7 +648,9 @@ func (proxier *Proxier) syncProxyRules() (retryError error) { // Keep track of how long syncs take. start := time.Now() - doFullSync := proxier.needFullSync || (time.Since(proxier.lastFullSync) > proxyutil.FullSyncPeriod) + doFullSync := proxier.needFullSync || + // Avoid regular full syncs for large clusters. + ((time.Since(proxier.lastFullSync) > proxyutil.FullSyncPeriod) && !proxier.largeClusterMode) defer func() { metrics.SyncProxyRulesLatency.WithLabelValues(string(proxier.ipFamily)).Observe(metrics.SinceInSeconds(start)) diff --git a/pkg/proxy/iptables/proxier_test.go b/pkg/proxy/iptables/proxier_test.go index 99e63c428bb..f0020c10d9b 100644 --- a/pkg/proxy/iptables/proxier_test.go +++ b/pkg/proxy/iptables/proxier_test.go @@ -5639,6 +5639,21 @@ func TestSyncProxyRulesLargeClusterMode(t *testing.T) { t.Errorf("numComments (%d) != 0 after partial resync when numEndpoints (%d) > threshold (%d)", numComments, expectedEndpoints+3, largeClusterEndpointsThreshold) } + // Even if FullSyncPeriod has elapsed, large-cluster mode should keep this as + // a partial resync when there are no explicit changes requiring a full sync. + if !fp.largeClusterMode { + t.Fatalf("expected to be in large cluster mode") + } + expectedLastFullSync := time.Now().Add(-proxyutil.FullSyncPeriod).Add(-time.Second) + fp.lastFullSync = expectedLastFullSync + err := fp.syncProxyRules() + if err != nil { + t.Fatalf("syncProxyRules failed: %v", err) + } + if !fp.lastFullSync.Equal(expectedLastFullSync) { + t.Fatalf("expected periodic sync in large cluster mode to skip full sync: lastFullSync changed from %v to %v", expectedLastFullSync, fp.lastFullSync) + } + // Now force a full resync and confirm that it rewrites the older services with // no comments as well. fp.forceSyncProxyRules()