From bc8aa8c0675f9c536b46944ca2de6c952ca18665 Mon Sep 17 00:00:00 2001 From: Dan Winship Date: Thu, 4 Dec 2025 10:30:00 -0500 Subject: [PATCH 1/2] Remove incorrect special-case when Endpoints move between EndpointSlices The code was assuming that if an Endpoint got moved from one slice to another, and one is "local" but the other isn't, then we should prefer the local one. But this doesn't make sense; if it's actually the same Endpoint (i.e., same targetRef) then both copies will have the same Hostname. And if it's not the same Endpoint, then one of the two Endpoints is wrong, but there's no reason to assume it's the non-local one. --- pkg/proxy/endpointschangetracker_test.go | 12 ++++++------ pkg/proxy/endpointslicecache.go | 10 ++++++---- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/pkg/proxy/endpointschangetracker_test.go b/pkg/proxy/endpointschangetracker_test.go index 0f1a408205d..923c3ccf6e2 100644 --- a/pkg/proxy/endpointschangetracker_test.go +++ b/pkg/proxy/endpointschangetracker_test.go @@ -1321,8 +1321,8 @@ func TestEndpointSliceUpdate(t *testing.T) { // test additions to existing state with partially overlapping slices and ports "add a slice that overlaps with existing state and partial ports": { startingSlices: []*discovery.EndpointSlice{ - generateEndpointSlice("svc1", "ns1", 1, 3, 999, 999, []string{"host1", "host2"}, []*int32{ptr.To[int32](80), ptr.To[int32](443)}), - generateEndpointSlice("svc1", "ns1", 2, 2, 999, 999, []string{"host1", "host2"}, []*int32{ptr.To[int32](80), ptr.To[int32](443)}), + generateEndpointSlice("svc1", "ns1", 1, 3, 999, 999, []string{"host1"}, []*int32{ptr.To[int32](80), ptr.To[int32](443)}), + generateEndpointSlice("svc1", "ns1", 2, 2, 999, 999, []string{"host1"}, []*int32{ptr.To[int32](80), ptr.To[int32](443)}), }, endpointsChangeTracker: NewEndpointsChangeTracker(v1.IPv4Protocol, "host1", nil, nil), namespacedName: types.NamespacedName{Name: "svc1", Namespace: "ns1"}, @@ -1336,14 +1336,14 @@ func TestEndpointSliceUpdate(t *testing.T) { &BaseEndpointInfo{ip: "10.0.1.3", port: 80, endpoint: "10.0.1.3:80", isLocal: true, ready: true, serving: true, terminating: false}, &BaseEndpointInfo{ip: "10.0.1.4", port: 80, endpoint: "10.0.1.4:80", isLocal: true, ready: true, serving: true, terminating: false}, &BaseEndpointInfo{ip: "10.0.1.5", port: 80, endpoint: "10.0.1.5:80", isLocal: true, ready: true, serving: true, terminating: false}, - &BaseEndpointInfo{ip: "10.0.2.1", port: 80, endpoint: "10.0.2.1:80", isLocal: false, ready: true, serving: true, terminating: false}, + &BaseEndpointInfo{ip: "10.0.2.1", port: 80, endpoint: "10.0.2.1:80", isLocal: true, ready: true, serving: true, terminating: false}, &BaseEndpointInfo{ip: "10.0.2.2", port: 80, endpoint: "10.0.2.2:80", isLocal: true, ready: true, serving: true, terminating: false}, }, makeServicePortName("ns1", "svc1", "port-1", v1.ProtocolTCP): { - &BaseEndpointInfo{ip: "10.0.1.1", port: 443, endpoint: "10.0.1.1:443", isLocal: false, ready: true, serving: true, terminating: false}, + &BaseEndpointInfo{ip: "10.0.1.1", port: 443, endpoint: "10.0.1.1:443", isLocal: true, ready: true, serving: true, terminating: false}, &BaseEndpointInfo{ip: "10.0.1.2", port: 443, endpoint: "10.0.1.2:443", isLocal: true, ready: true, serving: true, terminating: false}, - &BaseEndpointInfo{ip: "10.0.1.3", port: 443, endpoint: "10.0.1.3:443", isLocal: false, ready: true, serving: true, terminating: false}, - &BaseEndpointInfo{ip: "10.0.2.1", port: 443, endpoint: "10.0.2.1:443", isLocal: false, ready: true, serving: true, terminating: false}, + &BaseEndpointInfo{ip: "10.0.1.3", port: 443, endpoint: "10.0.1.3:443", isLocal: true, ready: true, serving: true, terminating: false}, + &BaseEndpointInfo{ip: "10.0.2.1", port: 443, endpoint: "10.0.2.1:443", isLocal: true, ready: true, serving: true, terminating: false}, &BaseEndpointInfo{ip: "10.0.2.2", port: 443, endpoint: "10.0.2.2:443", isLocal: true, ready: true, serving: true, terminating: false}, }, }, diff --git a/pkg/proxy/endpointslicecache.go b/pkg/proxy/endpointslicecache.go index 1d1eea4f40a..648d5c4a21b 100644 --- a/pkg/proxy/endpointslicecache.go +++ b/pkg/proxy/endpointslicecache.go @@ -232,10 +232,12 @@ func (cache *EndpointSliceCache) addEndpoints(svcPortName *ServicePortName, port endpointInfo := newBaseEndpointInfo(endpointIP, portNum, isLocal, ready, serving, terminating, zoneHints, nodeHints) - // This logic ensures we're deduplicating potential overlapping endpoints - // isLocal should not vary between matching endpoints, but if it does, we - // favor a true value here if it exists. - if _, exists := endpointSet[endpointInfo.String()]; !exists || isLocal { + // If an Endpoint gets moved from one slice to another, we may temporarily + // see it in both slices. Ideally we want to prefer the Endpoint from the + // more-recently-updated EndpointSlice, since it may have newer + // conditions. But we can't easily figure that out, and the situation will + // resolve itself once we receive the second EndpointSlice update anyway. + if _, exists := endpointSet[endpointInfo.String()]; !exists { endpointSet[endpointInfo.String()] = cache.makeEndpointInfo(endpointInfo, svcPortName) } } From e1b20366e38717e40b5b6b885ffa4ee81778547b Mon Sep 17 00:00:00 2001 From: Dan Winship Date: Thu, 4 Dec 2025 10:35:34 -0500 Subject: [PATCH 2/2] Handle the case of a pod IP being reused while the old Pod still exists If the pod network reuses a pod IP while the old pod is still terminating, then we may temporarily see two Endpoints for that IP. In that case, prefer the non-terminating one. --- pkg/proxy/endpointslicecache.go | 9 +++++- pkg/proxy/endpointslicecache_test.go | 42 ++++++++++++++++++++++++++++ 2 files changed, 50 insertions(+), 1 deletion(-) diff --git a/pkg/proxy/endpointslicecache.go b/pkg/proxy/endpointslicecache.go index 648d5c4a21b..abccefd43dd 100644 --- a/pkg/proxy/endpointslicecache.go +++ b/pkg/proxy/endpointslicecache.go @@ -237,7 +237,14 @@ func (cache *EndpointSliceCache) addEndpoints(svcPortName *ServicePortName, port // more-recently-updated EndpointSlice, since it may have newer // conditions. But we can't easily figure that out, and the situation will // resolve itself once we receive the second EndpointSlice update anyway. - if _, exists := endpointSet[endpointInfo.String()]; !exists { + // + // On the other hand, there maybe also be two *different* Endpoints (i.e., + // with different targetRefs) that point to the same IP, if the pod + // network reuses the IP from a terminating pod before the Pod object is + // fully deleted. In this case we want to prefer the running pod over the + // terminating one. (If there are multiple non-terminating pods with the + // same podIP, then the result is undefined.) + if _, exists := endpointSet[endpointInfo.String()]; !exists || !terminating { endpointSet[endpointInfo.String()] = cache.makeEndpointInfo(endpointInfo, svcPortName) } } diff --git a/pkg/proxy/endpointslicecache_test.go b/pkg/proxy/endpointslicecache_test.go index 610bb271c16..54a37ec3bad 100644 --- a/pkg/proxy/endpointslicecache_test.go +++ b/pkg/proxy/endpointslicecache_test.go @@ -311,6 +311,48 @@ func TestEndpointInfoByServicePort(t *testing.T) { }, }, }, + "with duplicate Endpoints, prefer non-terminating": { + namespacedName: types.NamespacedName{Name: "svc1", Namespace: "ns1"}, + hostname: "host1", + endpointSlices: []*discovery.EndpointSlice{ + func() *discovery.EndpointSlice { + es := generateEndpointSliceWithOffset("svc1", "ns1", 1, 1, 1, 999, 999, []string{"host1"}, []*int32{ptr.To[int32](80)}) + + // Duplicate the endpoint + ep1 := es.Endpoints[0] + ep1.Conditions.Ready = ptr.To(false) + ep1.Conditions.Serving = ptr.To(true) + ep1.Conditions.Terminating = ptr.To(true) + + ep2 := es.Endpoints[0] + ep2.Conditions.Ready = ptr.To(true) + ep2.Conditions.Serving = ptr.To(true) + ep2.Conditions.Terminating = ptr.To(false) + + ep3 := es.Endpoints[0] + ep3.Conditions.Ready = ptr.To(false) + ep3.Conditions.Serving = ptr.To(false) + ep3.Conditions.Terminating = ptr.To(true) + + // ep2 should win since it's the non-terminating one + es.Endpoints = []discovery.Endpoint{ep1, ep2, ep3} + return es + }(), + }, + expectedMap: spToEndpointMap{ + makeServicePortName("ns1", "svc1", "port-0", v1.ProtocolTCP): { + "10.0.1.1:80": &BaseEndpointInfo{ + ip: "10.0.1.1", + port: 80, + endpoint: "10.0.1.1:80", + isLocal: true, + ready: true, + serving: true, + terminating: false, + }, + }, + }, + }, } for name, tc := range testCases {