Merge pull request #96863 from tosi3k/automated-cherry-pick-of-#90218-#90242-upstream-release-1.18

Automated cherry pick of #90218: Lazy initialization of network urls for GCE provider #90242: Avoid unnecessary GCE API calls for IP-alias calls
This commit is contained in:
Kubernetes Prow Robot 2020-11-27 07:30:47 -08:00 committed by GitHub
commit ded8a1e285
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 106 additions and 62 deletions

View file

@ -21,6 +21,7 @@ package ipam
import (
"context"
"encoding/json"
"fmt"
"net"
"k8s.io/klog"
@ -60,8 +61,12 @@ func newAdapter(k8s clientset.Interface, cloud *gce.Cloud) *adapter {
return ret
}
func (a *adapter) Alias(ctx context.Context, nodeName string) (*net.IPNet, error) {
cidrs, err := a.cloud.AliasRanges(types.NodeName(nodeName))
func (a *adapter) Alias(ctx context.Context, node *v1.Node) (*net.IPNet, error) {
if node.Spec.ProviderID == "" {
return nil, fmt.Errorf("node %s doesn't have providerID", node.Name)
}
cidrs, err := a.cloud.AliasRangesByProviderID(node.Spec.ProviderID)
if err != nil {
return nil, err
}
@ -72,7 +77,7 @@ func (a *adapter) Alias(ctx context.Context, nodeName string) (*net.IPNet, error
case 1:
break
default:
klog.Warningf("Node %q has more than one alias assigned (%v), defaulting to the first", nodeName, cidrs)
klog.Warningf("Node %q has more than one alias assigned (%v), defaulting to the first", node.Name, cidrs)
}
_, cidrRange, err := net.ParseCIDR(cidrs[0])
@ -83,8 +88,12 @@ func (a *adapter) Alias(ctx context.Context, nodeName string) (*net.IPNet, error
return cidrRange, nil
}
func (a *adapter) AddAlias(ctx context.Context, nodeName string, cidrRange *net.IPNet) error {
return a.cloud.AddAliasToInstance(types.NodeName(nodeName), cidrRange)
func (a *adapter) AddAlias(ctx context.Context, node *v1.Node, cidrRange *net.IPNet) error {
if node.Spec.ProviderID == "" {
return fmt.Errorf("node %s doesn't have providerID", node.Name)
}
return a.cloud.AddAliasToInstanceByProviderID(node.Spec.ProviderID, cidrRange)
}
func (a *adapter) Node(ctx context.Context, name string) (*v1.Node, error) {

View file

@ -249,8 +249,11 @@ func (ca *cloudCIDRAllocator) updateCIDRAllocation(nodeName string) error {
klog.Errorf("Failed while getting node %v for updating Node.Spec.PodCIDR: %v", nodeName, err)
return err
}
if node.Spec.ProviderID == "" {
return fmt.Errorf("node %s doesn't have providerID", nodeName)
}
cidrs, err := ca.cloud.AliasRanges(types.NodeName(nodeName))
cidrs, err := ca.cloud.AliasRangesByProviderID(node.Spec.ProviderID)
if err != nil {
nodeutil.RecordNodeStatusChange(ca.recorder, node, "CIDRNotAvailable")
return fmt.Errorf("failed to allocate cidr: %v", err)

View file

@ -43,9 +43,9 @@ const (
// cloudAlias is the interface to the cloud platform APIs.
type cloudAlias interface {
// Alias returns the IP alias for the node.
Alias(ctx context.Context, nodeName string) (*net.IPNet, error)
Alias(ctx context.Context, node *v1.Node) (*net.IPNet, error)
// AddAlias adds an alias to the node.
AddAlias(ctx context.Context, nodeName string, cidrRange *net.IPNet) error
AddAlias(ctx context.Context, node *v1.Node, cidrRange *net.IPNet) error
}
// kubeAPI is the interface to the Kubernetes APIs.
@ -204,7 +204,7 @@ func (op *updateOp) run(sync *NodeSync) error {
op.node = node
}
aliasRange, err := sync.cloudAlias.Alias(ctx, sync.nodeName)
aliasRange, err := sync.cloudAlias.Alias(ctx, op.node)
if err != nil {
klog.Errorf("Error getting cloud alias for node %q: %v", sync.nodeName, err)
return err
@ -293,7 +293,7 @@ func (op *updateOp) updateAliasFromNode(ctx context.Context, sync *NodeSync, nod
return err
}
if err := sync.cloudAlias.AddAlias(ctx, node.Name, aliasRange); err != nil {
if err := sync.cloudAlias.AddAlias(ctx, node, aliasRange); err != nil {
klog.Errorf("Could not add alias %v for node %q: %v", aliasRange, node.Name, err)
return err
}
@ -325,7 +325,7 @@ func (op *updateOp) allocateRange(ctx context.Context, sync *NodeSync, node *v1.
// If addAlias returns a hard error, cidrRange will be leaked as there
// is no durable record of the range. The missing space will be
// recovered on the next restart of the controller.
if err := sync.cloudAlias.AddAlias(ctx, node.Name, cidrRange); err != nil {
if err := sync.cloudAlias.AddAlias(ctx, node, cidrRange); err != nil {
klog.Errorf("Could not add alias %v for node %q: %v", cidrRange, node.Name, err)
return err
}

View file

@ -58,13 +58,13 @@ type fakeAPIs struct {
results []error
}
func (f *fakeAPIs) Alias(ctx context.Context, nodeName string) (*net.IPNet, error) {
f.calls = append(f.calls, fmt.Sprintf("alias %v", nodeName))
func (f *fakeAPIs) Alias(ctx context.Context, node *v1.Node) (*net.IPNet, error) {
f.calls = append(f.calls, fmt.Sprintf("alias %v", node.Name))
return f.aliasRange, f.aliasErr
}
func (f *fakeAPIs) AddAlias(ctx context.Context, nodeName string, cidrRange *net.IPNet) error {
f.calls = append(f.calls, fmt.Sprintf("addAlias %v %v", nodeName, cidrRange))
func (f *fakeAPIs) AddAlias(ctx context.Context, node *v1.Node, cidrRange *net.IPNet) error {
f.calls = append(f.calls, fmt.Sprintf("addAlias %v %v", node.Name, cidrRange))
return f.addAliasErr
}

View file

@ -99,6 +99,13 @@ type Cloud struct {
// for the cloudprovider to start watching the configmap.
ClusterID ClusterID
// initializer is used for lazy initialization of subnetworkURL
// and isLegacyNetwork fields if they are not passed via the config.
// The reason is to avoid GCE API calls to initialize them if they
// will never be used. This is especially important when
// it is run from Kubelets, as there can be thousands of them.
subnetworkURLAndIsLegacyNetworkInitializer sync.Once
service *compute.Service
serviceBeta *computebeta.Service
serviceAlpha *computealpha.Service
@ -115,10 +122,14 @@ type Cloud struct {
// managedZones will be set to the 1 zone if running a single zone cluster
// it will be set to ALL zones in region for any multi-zone cluster
// Use GetAllCurrentZones to get only zones that contain nodes
managedZones []string
networkURL string
isLegacyNetwork bool
subnetworkURL string
managedZones []string
networkURL string
// unsafeIsLegacyNetwork should be used only via IsLegacyNetwork() accessor,
// to ensure it was properly initialized.
unsafeIsLegacyNetwork bool
// unsafeSubnetworkURL should be used only via SubnetworkURL() accessor,
// to ensure it was properly initialized.
unsafeSubnetworkURL string
secondaryRangeName string
networkProjectID string
onXPN bool
@ -465,32 +476,12 @@ func CreateGCECloud(config *CloudConfig) (*Cloud, error) {
subnetURL = config.SubnetworkURL
} else if config.SubnetworkName != "" {
subnetURL = gceSubnetworkURL(config.APIEndpoint, netProjID, config.Region, config.SubnetworkName)
} else {
// Determine the type of network and attempt to discover the correct subnet for AUTO mode.
// Gracefully fail because kubelet calls CreateGCECloud without any config, and minions
// lack the proper credentials for API calls.
if networkName := lastComponent(networkURL); networkName != "" {
var n *compute.Network
if n, err = getNetwork(service, netProjID, networkName); err != nil {
klog.Warningf("Could not retrieve network %q; err: %v", networkName, err)
} else {
switch typeOfNetwork(n) {
case netTypeLegacy:
klog.Infof("Network %q is type legacy - no subnetwork", networkName)
isLegacyNetwork = true
case netTypeCustom:
klog.Warningf("Network %q is type custom - cannot auto select a subnetwork", networkName)
case netTypeAuto:
subnetURL, err = determineSubnetURL(service, netProjID, networkName, config.Region)
if err != nil {
klog.Warningf("Could not determine subnetwork for network %q and region %v; err: %v", networkName, config.Region, err)
} else {
klog.Infof("Auto selecting subnetwork %q", subnetURL)
}
}
}
}
}
// If neither SubnetworkURL nor SubnetworkName are provided, defer to
// lazy initialization. Determining subnetURL and isLegacyNetwork requires
// GCE API call. Given that it's not used in many cases and the fact that
// the provider is initialized also for Kubelets (and there can be thousands
// of them) we defer to lazy initialization here.
if len(config.ManagedZones) == 0 {
config.ManagedZones, err = getZonesForRegion(service, config.ProjectID, config.Region)
@ -518,8 +509,8 @@ func CreateGCECloud(config *CloudConfig) (*Cloud, error) {
localZone: config.Zone,
managedZones: config.ManagedZones,
networkURL: networkURL,
isLegacyNetwork: isLegacyNetwork,
subnetworkURL: subnetURL,
unsafeIsLegacyNetwork: isLegacyNetwork,
unsafeSubnetworkURL: subnetURL,
secondaryRangeName: config.SecondaryRangeName,
nodeTags: config.NodeTags,
nodeInstancePrefix: config.NodeInstancePrefix,
@ -542,6 +533,45 @@ func CreateGCECloud(config *CloudConfig) (*Cloud, error) {
return gce, nil
}
// initializeNetworkConfig() is supposed to be called under sync.Once()
// for accessors to subnetworkURL and isLegacyNetwork fields.
func (g *Cloud) initializeSubnetworkURLAndIsLegacyNetwork() {
if g.unsafeSubnetworkURL != "" {
// This has already been initialized via the config.
return
}
var subnetURL string
var isLegacyNetwork bool
// Determine the type of network and attempt to discover the correct subnet for AUTO mode.
// Gracefully fail because kubelet calls CreateGCECloud without any config, and minions
// lack the proper credentials for API calls.
if networkName := lastComponent(g.NetworkURL()); networkName != "" {
if n, err := getNetwork(g.service, g.NetworkProjectID(), networkName); err != nil {
klog.Warningf("Could not retrieve network %q; err: %v", networkName, err)
} else {
switch typeOfNetwork(n) {
case netTypeLegacy:
klog.Infof("Network %q is type legacy - no subnetwork", networkName)
isLegacyNetwork = true
case netTypeCustom:
klog.Warningf("Network %q is type custom - cannot auto select a subnetwork", networkName)
case netTypeAuto:
subnetURL, err = determineSubnetURL(g.service, g.NetworkProjectID(), networkName, g.Region())
if err != nil {
klog.Warningf("Could not determine subnetwork for network %q and region %v; err: %v", networkName, g.Region(), err)
} else {
klog.Infof("Auto selecting subnetwork %q", subnetURL)
}
}
}
}
g.unsafeSubnetworkURL = subnetURL
g.unsafeIsLegacyNetwork = isLegacyNetwork
}
// SetRateLimiter adds a custom cloud.RateLimiter implementation.
// WARNING: Calling this could have unexpected behavior if you have in-flight
// requests. It is best to use this immediately after creating a Cloud.
@ -672,12 +702,14 @@ func (g *Cloud) NetworkURL() string {
// SubnetworkURL returns the subnetwork url
func (g *Cloud) SubnetworkURL() string {
return g.subnetworkURL
g.subnetworkURLAndIsLegacyNetworkInitializer.Do(g.initializeSubnetworkURLAndIsLegacyNetwork)
return g.unsafeSubnetworkURL
}
// IsLegacyNetwork returns true if the cluster is still running a legacy network configuration.
func (g *Cloud) IsLegacyNetwork() bool {
return g.isLegacyNetwork
g.subnetworkURLAndIsLegacyNetworkInitializer.Do(g.initializeSubnetworkURLAndIsLegacyNetwork)
return g.unsafeIsLegacyNetwork
}
// SetInformers sets up the zone handlers we need watching for node changes.

View file

@ -361,21 +361,20 @@ func (g *Cloud) CurrentNodeName(ctx context.Context, hostname string) (types.Nod
return types.NodeName(hostname), nil
}
// AliasRanges returns a list of CIDR ranges that are assigned to the
// AliasRangesByProviderID returns a list of CIDR ranges that are assigned to the
// `node` for allocation to pods. Returns a list of the form
// "<ip>/<netmask>".
func (g *Cloud) AliasRanges(nodeName types.NodeName) (cidrs []string, err error) {
func (g *Cloud) AliasRangesByProviderID(providerID string) (cidrs []string, err error) {
ctx, cancel := cloud.ContextWithCallTimeout()
defer cancel()
var instance *gceInstance
instance, err = g.getInstanceByName(mapNodeNameToInstanceName(nodeName))
_, zone, name, err := splitProviderID(providerID)
if err != nil {
return
return nil, err
}
var res *computebeta.Instance
res, err = g.c.BetaInstances().Get(ctx, meta.ZonalKey(instance.Name, lastComponent(instance.Zone)))
res, err = g.c.BetaInstances().Get(ctx, meta.ZonalKey(canonicalizeInstanceName(name), zone))
if err != nil {
return
}
@ -388,28 +387,29 @@ func (g *Cloud) AliasRanges(nodeName types.NodeName) (cidrs []string, err error)
return
}
// AddAliasToInstance adds an alias to the given instance from the named
// AddAliasToInstanceByProviderID adds an alias to the given instance from the named
// secondary range.
func (g *Cloud) AddAliasToInstance(nodeName types.NodeName, alias *net.IPNet) error {
func (g *Cloud) AddAliasToInstanceByProviderID(providerID string, alias *net.IPNet) error {
ctx, cancel := cloud.ContextWithCallTimeout()
defer cancel()
v1instance, err := g.getInstanceByName(mapNodeNameToInstanceName(nodeName))
_, zone, name, err := splitProviderID(providerID)
if err != nil {
return err
}
instance, err := g.c.BetaInstances().Get(ctx, meta.ZonalKey(v1instance.Name, lastComponent(v1instance.Zone)))
instance, err := g.c.BetaInstances().Get(ctx, meta.ZonalKey(canonicalizeInstanceName(name), zone))
if err != nil {
return err
}
switch len(instance.NetworkInterfaces) {
case 0:
return fmt.Errorf("instance %q has no network interfaces", nodeName)
return fmt.Errorf("instance %q has no network interfaces", providerID)
case 1:
default:
klog.Warningf("Instance %q has more than one network interface, using only the first (%v)",
nodeName, instance.NetworkInterfaces)
providerID, instance.NetworkInterfaces)
}
iface := &computebeta.NetworkInterface{}
@ -420,7 +420,7 @@ func (g *Cloud) AddAliasToInstance(nodeName types.NodeName, alias *net.IPNet) er
SubnetworkRangeName: g.secondaryRangeName,
})
mc := newInstancesMetricContext("add_alias", v1instance.Zone)
mc := newInstancesMetricContext("add_alias", zone)
err = g.c.BetaInstances().UpdateNetworkInterface(ctx, meta.ZonalKey(instance.Name, lastComponent(instance.Zone)), iface.Name, iface)
return mc.Observe(err)
}

View file

@ -78,7 +78,7 @@ func (g *Cloud) ensureInternalLoadBalancer(clusterName, clusterID string, svc *v
}
scheme := cloud.SchemeInternal
options := getILBOptions(svc)
if g.isLegacyNetwork {
if g.IsLegacyNetwork() {
g.eventRecorder.Event(svc, v1.EventTypeWarning, "ILBOptionsIgnored", "Internal LoadBalancer options are not supported with Legacy Networks.")
options = ILBOptions{}
}