2016-08-24 21:08:12 -04:00
/ *
Copyright 2014 The Kubernetes Authors .
Licensed under the Apache License , Version 2.0 ( the "License" ) ;
you may not use this file except in compliance with the License .
You may obtain a copy of the License at
http : //www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing , software
distributed under the License is distributed on an "AS IS" BASIS ,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND , either express or implied .
See the License for the specific language governing permissions and
limitations under the License .
* /
2019-10-24 21:34:25 -04:00
package network
2016-08-24 21:08:12 -04:00
import (
2020-02-07 21:16:47 -05:00
"context"
2020-10-30 14:09:50 -04:00
"crypto/tls"
2016-08-24 21:08:12 -04:00
"encoding/json"
"fmt"
2016-12-29 18:35:47 -05:00
"io/ioutil"
"net"
"net/http"
"strconv"
2016-08-24 21:08:12 -04:00
"strings"
"time"
2019-03-28 20:31:25 -04:00
"github.com/onsi/ginkgo"
2019-07-01 12:55:04 -04:00
v1 "k8s.io/api/core/v1"
2020-04-07 04:11:30 -04:00
apierrors "k8s.io/apimachinery/pkg/api/errors"
2017-01-11 09:09:48 -05:00
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
2017-01-27 15:42:17 -05:00
"k8s.io/apimachinery/pkg/util/intstr"
2017-01-11 09:09:48 -05:00
utilnet "k8s.io/apimachinery/pkg/util/net"
"k8s.io/apimachinery/pkg/util/sets"
2017-01-24 09:35:22 -05:00
"k8s.io/apimachinery/pkg/util/uuid"
2017-01-11 09:09:48 -05:00
"k8s.io/apimachinery/pkg/util/wait"
2017-06-23 16:56:37 -04:00
clientset "k8s.io/client-go/kubernetes"
coreclientset "k8s.io/client-go/kubernetes/typed/core/v1"
2019-10-24 21:34:25 -04:00
"k8s.io/kubernetes/test/e2e/framework"
2019-05-24 02:47:40 -04:00
e2enode "k8s.io/kubernetes/test/e2e/framework/node"
2019-05-07 20:09:50 -04:00
e2epod "k8s.io/kubernetes/test/e2e/framework/pod"
2020-01-17 03:41:35 -05:00
e2eskipper "k8s.io/kubernetes/test/e2e/framework/skipper"
2020-03-24 02:00:50 -04:00
e2essh "k8s.io/kubernetes/test/e2e/framework/ssh"
2017-08-29 04:32:08 -04:00
imageutils "k8s.io/kubernetes/test/utils/image"
2020-11-11 17:04:33 -05:00
netutils "k8s.io/utils/net"
2016-08-24 21:08:12 -04:00
)
const (
2019-03-28 20:31:25 -04:00
// EndpointHTTPPort is an endpoint HTTP port for testing.
2021-04-22 16:17:10 -04:00
EndpointHTTPPort = 8083
2019-03-28 20:31:25 -04:00
// EndpointUDPPort is an endpoint UDP port for testing.
2020-02-07 06:28:50 -05:00
EndpointUDPPort = 8081
// EndpointSCTPPort is an endpoint SCTP port for testing.
2020-09-15 11:07:06 -04:00
EndpointSCTPPort = 8082
// testContainerHTTPPort is the test container http port.
testContainerHTTPPort = 9080
2019-03-28 20:31:25 -04:00
// ClusterHTTPPort is a cluster HTTP port for testing.
ClusterHTTPPort = 80
// ClusterUDPPort is a cluster UDP port for testing.
2020-02-07 06:28:50 -05:00
ClusterUDPPort = 90
// ClusterSCTPPort is a cluster SCTP port for testing.
ClusterSCTPPort = 95
2017-11-05 14:25:55 -05:00
testPodName = "test-container-pod"
hostTestPodName = "host-test-container-pod"
nodePortServiceName = "node-port-service"
sessionAffinityServiceName = "session-affinity-service"
2016-11-04 23:27:39 -04:00
// wait time between poll attempts of a Service vip and/or nodePort.
// coupled with testTries to produce a net timeout value.
hitEndpointRetryDelay = 2 * time . Second
2016-08-24 21:08:12 -04:00
// Number of retries to hit a given set of endpoints. Needs to be high
// because we verify iptables statistical rr loadbalancing.
testTries = 30
2016-09-14 06:27:20 -04:00
// Maximum number of pods in a test, to make test work in large clusters.
2016-10-19 08:30:40 -04:00
maxNetProxyPodsCount = 10
2019-03-28 20:31:25 -04:00
// SessionAffinityChecks is number of checks to hit a given set of endpoints when enable session affinity.
2017-08-10 05:06:14 -04:00
SessionAffinityChecks = 10
2019-05-08 09:19:58 -04:00
// RegexIPv4 is a regex to match IPv4 addresses
2019-04-24 17:37:24 -04:00
RegexIPv4 = "(?:\\d+)\\.(?:\\d+)\\.(?:\\d+)\\.(?:\\d+)"
2019-05-08 09:19:58 -04:00
// RegexIPv6 is a regex to match IPv6 addresses
2019-10-24 21:34:25 -04:00
RegexIPv6 = "(?:(?:(?:(?:(?:(?:(?:[0-9a-fA-F]{1,4})):){6})(?:(?:(?:(?:(?:[0-9a-fA-F]{1,4})):(?:(?:[0-9a-fA-F]{1,4})))|(?:(?:(?:(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9]))\\.){3}(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9])))))))|(?:(?:::(?:(?:(?:[0-9a-fA-F]{1,4})):){5})(?:(?:(?:(?:(?:[0-9a-fA-F]{1,4})):(?:(?:[0-9a-fA-F]{1,4})))|(?:(?:(?:(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9]))\\.){3}(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9])))))))|(?:(?:(?:(?:(?:[0-9a-fA-F]{1,4})))?::(?:(?:(?:[0-9a-fA-F]{1,4})):){4})(?:(?:(?:(?:(?:[0-9a-fA-F]{1,4})):(?:(?:[0-9a-fA-F]{1,4})))|(?:(?:(?:(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9]))\\.){3}(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9])))))))|(?:(?:(?:(?:(?:(?:[0-9a-fA-F]{1,4})):){0,1}(?:(?:[0-9a-fA-F]{1,4})))?::(?:(?:(?:[0-9a-fA-F]{1,4})):){3})(?:(?:(?:(?:(?:[0-9a-fA-F]{1,4})):(?:(?:[0-9a-fA-F]{1,4})))|(?:(?:(?:(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9]))\\.){3}(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9])))))))|(?:(?:(?:(?:(?:(?:[0-9a-fA-F]{1,4})):){0,2}(?:(?:[0-9a-fA-F]{1,4})))?::(?:(?:(?:[0-9a-fA-F]{1,4})):){2})(?:(?:(?:(?:(?:[0-9a-fA-F]{1,4})):(?:(?:[0-9a-fA-F]{1,4})))|(?:(?:(?:(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9]))\\.){3}(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9])))))))|(?:(?:(?:(?:(?:(?:[0-9a-fA-F]{1,4})):){0,3}(?:(?:[0-9a-fA-F]{1,4})))?::(?:(?:[0-9a-fA-F]{1,4})):)(?:(?:(?:(?:(?:[0-9a-fA-F]{1,4})):(?:(?:[0-9a-fA-F]{1,4})))|(?:(?:(?:(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9]))\\.){3}(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9])))))))|(?:(?:(?:(?:(?:(?:[0-9a-fA-F]{1,4})):){0,4}(?:(?:[0-9a-fA-F]{1,4})))?::)(?:(?:(?:(?:(?:[0-9a-fA-F]{1,4})):(?:(?:[0-9a-fA-F]{1,4})))|(?:(?:(?:(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9]))\\.){3}(?:(?:25[0-5]|(?:[1-9]|1[0-9]|2[0-4])?[0-9])))))))|(?:(?:(?:(?:(?:(?:[0-9a-fA-F]{1,4})):){0,5}(?:(?:[0-9a-fA-F]{1,4})))?::)(?:(?:[0-9a-fA-F]{1,4})))|(?:(?:(?:(?:(?:(?:[0-9a-fA-F]{1,4})):){0,6}(?:(?:[0-9a-fA-F]{1,4})))?::))))"
resizeNodeReadyTimeout = 2 * time . Minute
resizeNodeNotReadyTimeout = 2 * time . Minute
2019-10-05 16:20:40 -04:00
// netexec dial commands
// the destination will echo its hostname.
echoHostname = "hostname"
2016-08-24 21:08:12 -04:00
)
2019-07-01 12:55:04 -04:00
// NetexecImageName is the image name for agnhost.
var NetexecImageName = imageutils . GetE2EImage ( imageutils . Agnhost )
2017-08-29 04:32:08 -04:00
2020-10-26 07:01:06 -04:00
// Option is used to configure the NetworkingTest object
type Option func ( * NetworkingTestConfig )
// EnableSCTP listen on SCTP ports on the endpoints
func EnableSCTP ( config * NetworkingTestConfig ) {
config . SCTPEnabled = true
}
2020-11-11 17:04:33 -05:00
// EnableDualStack create Dual Stack services
func EnableDualStack ( config * NetworkingTestConfig ) {
config . DualStackEnabled = true
}
2020-10-26 07:01:06 -04:00
// UseHostNetwork run the test container with HostNetwork=true.
func UseHostNetwork ( config * NetworkingTestConfig ) {
config . HostNetwork = true
}
// EndpointsUseHostNetwork run the endpoints pods with HostNetwork=true.
func EndpointsUseHostNetwork ( config * NetworkingTestConfig ) {
config . EndpointsHostNetwork = true
}
2016-08-24 21:08:12 -04:00
// NewNetworkingTestConfig creates and sets up a new test config helper.
2020-10-26 07:01:06 -04:00
func NewNetworkingTestConfig ( f * framework . Framework , setters ... Option ) * NetworkingTestConfig {
// default options
config := & NetworkingTestConfig {
f : f ,
Namespace : f . Namespace . Name ,
}
for _ , setter := range setters {
setter ( config )
}
2019-03-28 20:31:25 -04:00
ginkgo . By ( fmt . Sprintf ( "Performing setup for networking test in namespace %v" , config . Namespace ) )
2016-10-04 17:06:25 -04:00
config . setup ( getServiceSelector ( ) )
2016-08-24 21:08:12 -04:00
return config
}
2019-03-28 20:31:25 -04:00
// NewCoreNetworkingTestConfig creates and sets up a new test config helper for Node E2E.
2019-10-24 21:34:25 -04:00
func NewCoreNetworkingTestConfig ( f * framework . Framework , hostNetwork bool ) * NetworkingTestConfig {
2020-10-26 07:01:06 -04:00
// default options
config := & NetworkingTestConfig {
f : f ,
Namespace : f . Namespace . Name ,
HostNetwork : hostNetwork ,
}
2019-03-28 20:31:25 -04:00
ginkgo . By ( fmt . Sprintf ( "Performing setup for networking test in namespace %v" , config . Namespace ) )
2016-10-04 17:06:25 -04:00
config . setupCore ( getServiceSelector ( ) )
return config
}
func getServiceSelector ( ) map [ string ] string {
2019-03-28 20:31:25 -04:00
ginkgo . By ( "creating a selector" )
2016-10-04 17:06:25 -04:00
selectorName := "selector-" + string ( uuid . NewUUID ( ) )
serviceSelector := map [ string ] string {
selectorName : "true" ,
}
return serviceSelector
}
2016-08-24 21:08:12 -04:00
// NetworkingTestConfig is a convenience class around some utility methods
// for testing kubeproxy/networking/services/endpoints.
type NetworkingTestConfig struct {
2016-10-04 17:06:25 -04:00
// TestContaienrPod is a test pod running the netexec image. It is capable
2016-08-24 21:08:12 -04:00
// of executing tcp/udp requests against ip:port.
2016-11-18 15:55:17 -05:00
TestContainerPod * v1 . Pod
2018-10-08 04:41:06 -04:00
// HostTestContainerPod is a pod running using the hostexec image.
2016-11-18 15:55:17 -05:00
HostTestContainerPod * v1 . Pod
2018-10-08 04:41:06 -04:00
// if the HostTestContainerPod is running with HostNetwork=true.
HostNetwork bool
2020-10-26 07:01:06 -04:00
// if the endpoints Pods are running with HostNetwork=true.
EndpointsHostNetwork bool
2020-02-07 06:28:50 -05:00
// if the test pods are listening on sctp port. We need this as sctp tests
// are marked as disruptive as they may load the sctp module.
SCTPEnabled bool
2020-11-11 17:04:33 -05:00
// DualStackEnabled enables dual stack on services
DualStackEnabled bool
2016-10-04 17:06:25 -04:00
// EndpointPods are the pods belonging to the Service created by this
2016-08-24 21:08:12 -04:00
// test config. Each invocation of `setup` creates a service with
// 1 pod per node running the netexecImage.
2016-11-18 15:55:17 -05:00
EndpointPods [ ] * v1 . Pod
2019-10-24 21:34:25 -04:00
f * framework . Framework
podClient * framework . PodClient
2016-10-04 17:06:25 -04:00
// NodePortService is a Service with Type=NodePort spanning over all
2016-08-24 21:08:12 -04:00
// endpointPods.
2016-11-18 15:55:17 -05:00
NodePortService * v1 . Service
2017-11-05 14:25:55 -05:00
// SessionAffinityService is a Service with SessionAffinity=ClientIP
// spanning over all endpointPods.
SessionAffinityService * v1 . Service
2016-10-04 17:06:25 -04:00
// Nodes is a list of nodes in the cluster.
2016-11-18 15:55:17 -05:00
Nodes [ ] v1 . Node
2016-10-04 17:06:25 -04:00
// MaxTries is the number of retries tolerated for tests run against
2016-08-24 21:08:12 -04:00
// endpoints and services created by this config.
2016-10-04 17:06:25 -04:00
MaxTries int
2020-11-11 17:04:33 -05:00
// The ClusterIP of the Service created by this test config.
2016-10-04 17:06:25 -04:00
ClusterIP string
2020-11-11 17:04:33 -05:00
// The SecondaryClusterIP of the Service created by this test config.
SecondaryClusterIP string
2020-11-16 17:11:11 -05:00
// NodeIP it's an ExternalIP if the node has one,
// or an InternalIP if not, for use in nodePort testing.
2016-10-04 17:06:25 -04:00
NodeIP string
2020-11-16 17:11:11 -05:00
// SecondaryNodeIP it's an ExternalIP of the secondary IP family if the node has one,
// or an InternalIP if not, for usein nodePort testing.
2020-11-11 17:04:33 -05:00
SecondaryNodeIP string
2020-02-07 06:28:50 -05:00
// The http/udp/sctp nodePorts of the Service.
2019-03-28 20:31:25 -04:00
NodeHTTPPort int
NodeUDPPort int
2020-02-07 06:28:50 -05:00
NodeSCTPPort int
2016-08-24 21:08:12 -04:00
// The kubernetes namespace within which all resources for this
// config are created
2016-10-04 17:06:25 -04:00
Namespace string
2016-08-24 21:08:12 -04:00
}
2020-05-29 00:00:40 -04:00
// NetexecDialResponse represents the response returned by the `netexec` subcommand of `agnhost`
type NetexecDialResponse struct {
Responses [ ] string ` json:"responses" `
Errors [ ] string ` json:"errors" `
}
2020-09-23 10:32:51 -04:00
// DialFromEndpointContainer executes a curl via kubectl exec in an endpoint container. Returns an error to be handled by the caller.
2020-09-23 10:09:38 -04:00
func ( config * NetworkingTestConfig ) DialFromEndpointContainer ( protocol , targetIP string , targetPort , maxTries , minTries int , expectedEps sets . String ) error {
return config . DialFromContainer ( protocol , echoHostname , config . EndpointPods [ 0 ] . Status . PodIP , targetIP , EndpointHTTPPort , targetPort , maxTries , minTries , expectedEps )
2016-08-24 21:08:12 -04:00
}
2020-09-23 10:32:51 -04:00
// DialFromTestContainer executes a curl via kubectl exec in a test container. Returns an error to be handled by the caller.
2020-09-23 10:09:38 -04:00
func ( config * NetworkingTestConfig ) DialFromTestContainer ( protocol , targetIP string , targetPort , maxTries , minTries int , expectedEps sets . String ) error {
return config . DialFromContainer ( protocol , echoHostname , config . TestContainerPod . Status . PodIP , targetIP , testContainerHTTPPort , targetPort , maxTries , minTries , expectedEps )
2019-10-05 16:20:40 -04:00
}
2020-09-23 10:32:51 -04:00
// DialEchoFromTestContainer executes a curl via kubectl exec in a test container. The response is expected to match the echoMessage, Returns an error to be handled by the caller.
2020-09-23 10:09:38 -04:00
func ( config * NetworkingTestConfig ) DialEchoFromTestContainer ( protocol , targetIP string , targetPort , maxTries , minTries int , echoMessage string ) error {
2019-10-05 16:20:40 -04:00
expectedResponse := sets . NewString ( )
expectedResponse . Insert ( echoMessage )
var dialCommand string
// NOTE(claudiub): netexec /dialCommand will send a request to the given targetIP and targetPort as follows:
// for HTTP: it will send a request to: http://targetIP:targetPort/dialCommand
// for UDP: it will send targetCommand as a message. The consumer receives the data message and looks for
// a few starting strings, including echo, and treats it accordingly.
if protocol == "http" {
dialCommand = fmt . Sprintf ( "echo?msg=%s" , echoMessage )
} else {
dialCommand = fmt . Sprintf ( "echo%%20%s" , echoMessage )
}
2020-09-23 10:09:38 -04:00
return config . DialFromContainer ( protocol , dialCommand , config . TestContainerPod . Status . PodIP , targetIP , testContainerHTTPPort , targetPort , maxTries , minTries , expectedResponse )
2016-08-24 21:08:12 -04:00
}
// diagnoseMissingEndpoints prints debug information about the endpoints that
// are NOT in the given list of foundEndpoints. These are the endpoints we
// expected a response from.
func ( config * NetworkingTestConfig ) diagnoseMissingEndpoints ( foundEndpoints sets . String ) {
2016-10-04 17:06:25 -04:00
for _ , e := range config . EndpointPods {
2016-08-24 21:08:12 -04:00
if foundEndpoints . Has ( e . Name ) {
continue
}
2019-10-24 21:34:25 -04:00
framework . Logf ( "\nOutput of kubectl describe pod %v/%v:\n" , e . Namespace , e . Name )
desc , _ := framework . RunKubectl (
2019-11-21 22:32:00 -05:00
e . Namespace , "describe" , "pod" , e . Name , fmt . Sprintf ( "--namespace=%v" , e . Namespace ) )
2019-10-24 21:34:25 -04:00
framework . Logf ( desc )
2016-08-24 21:08:12 -04:00
}
}
2016-10-04 17:06:25 -04:00
// EndpointHostnames returns a set of hostnames for existing endpoints.
func ( config * NetworkingTestConfig ) EndpointHostnames ( ) sets . String {
2016-08-24 21:08:12 -04:00
expectedEps := sets . NewString ( )
2016-10-04 17:06:25 -04:00
for _ , p := range config . EndpointPods {
2020-10-26 07:01:06 -04:00
if config . EndpointsHostNetwork {
expectedEps . Insert ( p . Spec . NodeSelector [ "kubernetes.io/hostname" ] )
} else {
expectedEps . Insert ( p . Name )
}
2016-08-24 21:08:12 -04:00
}
return expectedEps
}
2020-05-29 00:00:40 -04:00
func makeCURLDialCommand ( ipPort , dialCmd , protocol , targetIP string , targetPort int ) string {
// The current versions of curl included in CentOS and RHEL distros
// misinterpret square brackets around IPv6 as globbing, so use the -g
// argument to disable globbing to handle the IPv6 case.
return fmt . Sprintf ( "curl -g -q -s 'http://%s/dial?request=%s&protocol=%s&host=%s&port=%d&tries=1'" ,
ipPort ,
dialCmd ,
protocol ,
targetIP ,
targetPort )
}
2019-03-28 20:31:25 -04:00
// DialFromContainer executes a curl via kubectl exec in a test container,
2016-08-24 21:08:12 -04:00
// which might then translate to a tcp or udp request based on the protocol
// argument in the url.
// - minTries is the minimum number of curl attempts required before declaring
// success. Set to 0 if you'd like to return as soon as all endpoints respond
// at least once.
// - maxTries is the maximum number of curl attempts. If this many attempts pass
// and we don't see all expected endpoints, the test fails.
2019-10-05 16:20:40 -04:00
// - targetIP is the source Pod IP that will dial the given dialCommand using the given protocol.
// - dialCommand is the command that the targetIP will send to the targetIP using the given protocol.
// the dialCommand should be formatted properly for the protocol (http: URL path+parameters,
// udp: command%20parameters, where parameters are optional)
// - expectedResponses is the unordered set of responses to wait for. The responses are based on
// the dialCommand; for example, for the dialCommand "hostname", the expectedResponses
// should contain the hostnames reported by each pod in the service through /hostName.
2016-08-24 21:08:12 -04:00
// maxTries == minTries will confirm that we see the expected endpoints and no
// more for maxTries. Use this if you want to eg: fail a readiness check on a
// pod and confirm it doesn't show up as an endpoint.
2020-09-23 10:09:38 -04:00
// Returns nil if no error, or error message if failed after trying maxTries.
func ( config * NetworkingTestConfig ) DialFromContainer ( protocol , dialCommand , containerIP , targetIP string , containerHTTPPort , targetPort , maxTries , minTries int , expectedResponses sets . String ) error {
2019-03-28 20:31:25 -04:00
ipPort := net . JoinHostPort ( containerIP , strconv . Itoa ( containerHTTPPort ) )
2020-05-29 00:00:40 -04:00
cmd := makeCURLDialCommand ( ipPort , dialCommand , protocol , targetIP , targetPort )
2016-08-24 21:08:12 -04:00
2019-10-05 16:20:40 -04:00
responses := sets . NewString ( )
2016-08-24 21:08:12 -04:00
for i := 0 ; i < maxTries ; i ++ {
2020-05-29 00:00:40 -04:00
resp , err := config . GetResponseFromContainer ( protocol , dialCommand , containerIP , targetIP , containerHTTPPort , targetPort )
2016-10-04 17:04:13 -04:00
if err != nil {
2020-06-15 02:42:17 -04:00
// A failure to kubectl exec counts as a try, not a hard fail.
// Also note that we will keep failing for maxTries in tests where
// we confirm unreachability.
2020-05-29 00:00:40 -04:00
framework . Logf ( "GetResponseFromContainer: %s" , err )
continue
}
for _ , response := range resp . Responses {
trimmed := strings . TrimSpace ( response )
if trimmed != "" {
responses . Insert ( trimmed )
2016-10-04 17:04:13 -04:00
}
2016-08-24 21:08:12 -04:00
}
2019-10-05 16:20:40 -04:00
framework . Logf ( "Waiting for responses: %v" , expectedResponses . Difference ( responses ) )
2016-08-24 21:08:12 -04:00
// Check against i+1 so we exit if minTries == maxTries.
2019-10-05 16:20:40 -04:00
if ( responses . Equal ( expectedResponses ) || responses . Len ( ) == 0 && expectedResponses . Len ( ) == 0 ) && i + 1 >= minTries {
2020-09-23 10:09:38 -04:00
framework . Logf ( "reached %v after %v/%v tries" , targetIP , i , maxTries )
return nil
2016-08-24 21:08:12 -04:00
}
2016-11-04 23:27:39 -04:00
// TODO: get rid of this delay #36281
time . Sleep ( hitEndpointRetryDelay )
2016-08-24 21:08:12 -04:00
}
2019-10-05 16:20:40 -04:00
if dialCommand == echoHostname {
config . diagnoseMissingEndpoints ( responses )
}
2020-09-23 10:09:38 -04:00
returnMsg := fmt . Errorf ( "did not find expected responses... \nTries %d\nCommand %v\nretrieved %v\nexpected %v" , maxTries , cmd , responses , expectedResponses )
framework . Logf ( "encountered error during dial (%v)" , returnMsg )
return returnMsg
2016-08-24 21:08:12 -04:00
}
2019-03-28 20:31:25 -04:00
// GetEndpointsFromTestContainer executes a curl via kubectl exec in a test container.
2017-10-11 22:06:03 -04:00
func ( config * NetworkingTestConfig ) GetEndpointsFromTestContainer ( protocol , targetIP string , targetPort , tries int ) ( sets . String , error ) {
2019-03-28 20:31:25 -04:00
return config . GetEndpointsFromContainer ( protocol , config . TestContainerPod . Status . PodIP , targetIP , testContainerHTTPPort , targetPort , tries )
2017-08-10 05:06:14 -04:00
}
2017-10-11 22:06:03 -04:00
// GetEndpointsFromContainer executes a curl via kubectl exec in a test container,
// which might then translate to a tcp or udp request based on the protocol argument
2017-10-27 04:45:53 -04:00
// in the url. It returns all different endpoints from multiple retries.
// - tries is the number of curl attempts. If this many attempts pass and
2017-10-11 22:06:03 -04:00
// we don't see any endpoints, the test fails.
2019-03-28 20:31:25 -04:00
func ( config * NetworkingTestConfig ) GetEndpointsFromContainer ( protocol , containerIP , targetIP string , containerHTTPPort , targetPort , tries int ) ( sets . String , error ) {
ipPort := net . JoinHostPort ( containerIP , strconv . Itoa ( containerHTTPPort ) )
2020-05-29 00:00:40 -04:00
cmd := makeCURLDialCommand ( ipPort , "hostName" , protocol , targetIP , targetPort )
2017-08-10 05:06:14 -04:00
eps := sets . NewString ( )
2017-10-11 22:06:03 -04:00
for i := 0 ; i < tries ; i ++ {
2018-10-08 04:41:06 -04:00
stdout , stderr , err := config . f . ExecShellInPodWithFullOutput ( config . TestContainerPod . Name , cmd )
2017-08-10 05:06:14 -04:00
if err != nil {
// A failure to kubectl exec counts as a try, not a hard fail.
// Also note that we will keep failing for maxTries in tests where
// we confirm unreachability.
2019-10-24 21:34:25 -04:00
framework . Logf ( "Failed to execute %q: %v, stdout: %q, stderr: %q" , cmd , err , stdout , stderr )
2017-08-10 05:06:14 -04:00
} else {
2020-10-01 07:40:43 -04:00
podInfo := fmt . Sprintf ( "name: %v, namespace: %v, hostIp: %v, podIp: %v, conditions: %v" , config . TestContainerPod . Name , config . TestContainerPod . Namespace , config . TestContainerPod . Status . HostIP , config . TestContainerPod . Status . PodIP , config . TestContainerPod . Status . Conditions )
framework . Logf ( "Tries: %d, in try: %d, stdout: %v, stderr: %v, command run in Pod { %#v }" , tries , i , stdout , stderr , podInfo )
2020-05-29 00:00:40 -04:00
var output NetexecDialResponse
2017-08-10 05:06:14 -04:00
if err := json . Unmarshal ( [ ] byte ( stdout ) , & output ) ; err != nil {
2019-10-24 21:34:25 -04:00
framework . Logf ( "WARNING: Failed to unmarshal curl response. Cmd %v run in %v, output: %s, err: %v" ,
2020-06-18 10:10:10 -04:00
cmd , config . TestContainerPod . Name , stdout , err )
2017-08-10 05:06:14 -04:00
continue
}
2020-05-29 00:00:40 -04:00
for _ , hostName := range output . Responses {
2017-08-10 05:06:14 -04:00
trimmed := strings . TrimSpace ( hostName )
if trimmed != "" {
eps . Insert ( trimmed )
}
}
2017-10-11 22:06:03 -04:00
// TODO: get rid of this delay #36281
time . Sleep ( hitEndpointRetryDelay )
2017-08-10 05:06:14 -04:00
}
}
2017-10-27 04:45:53 -04:00
return eps , nil
2017-08-10 05:06:14 -04:00
}
2020-05-29 00:00:40 -04:00
// GetResponseFromContainer executes a curl via kubectl exec in a container.
func ( config * NetworkingTestConfig ) GetResponseFromContainer ( protocol , dialCommand , containerIP , targetIP string , containerHTTPPort , targetPort int ) ( NetexecDialResponse , error ) {
ipPort := net . JoinHostPort ( containerIP , strconv . Itoa ( containerHTTPPort ) )
cmd := makeCURLDialCommand ( ipPort , dialCommand , protocol , targetIP , targetPort )
stdout , stderr , err := config . f . ExecShellInPodWithFullOutput ( config . TestContainerPod . Name , cmd )
if err != nil {
return NetexecDialResponse { } , fmt . Errorf ( "failed to execute %q: %v, stdout: %q, stderr: %q" , cmd , err , stdout , stderr )
}
var output NetexecDialResponse
if err := json . Unmarshal ( [ ] byte ( stdout ) , & output ) ; err != nil {
return NetexecDialResponse { } , fmt . Errorf ( "failed to unmarshal curl response. Cmd %v run in %v, output: %s, err: %v" ,
cmd , config . TestContainerPod . Name , stdout , err )
}
return output , nil
}
// GetResponseFromTestContainer executes a curl via kubectl exec in a test container.
func ( config * NetworkingTestConfig ) GetResponseFromTestContainer ( protocol , dialCommand , targetIP string , targetPort int ) ( NetexecDialResponse , error ) {
return config . GetResponseFromContainer ( protocol , dialCommand , config . TestContainerPod . Status . PodIP , targetIP , testContainerHTTPPort , targetPort )
}
// GetHTTPCodeFromTestContainer executes a curl via kubectl exec in a test container and returns the status code.
func ( config * NetworkingTestConfig ) GetHTTPCodeFromTestContainer ( path , targetIP string , targetPort int ) ( int , error ) {
cmd := fmt . Sprintf ( "curl -g -q -s -o /dev/null -w %%{http_code} http://%s:%d%s" ,
targetIP ,
targetPort ,
path )
stdout , stderr , err := config . f . ExecShellInPodWithFullOutput ( config . TestContainerPod . Name , cmd )
// We only care about the status code reported by curl,
// and want to return any other errors, such as cannot execute command in the Pod.
// If curl failed to connect to host, it would exit with code 7, which makes `ExecShellInPodWithFullOutput`
// return a non-nil error and output "000" to stdout.
if err != nil && len ( stdout ) == 0 {
return 0 , fmt . Errorf ( "failed to execute %q: %v, stderr: %q" , cmd , err , stderr )
}
code , err := strconv . Atoi ( stdout )
if err != nil {
return 0 , fmt . Errorf ( "failed to parse status code returned by healthz endpoint: %w, code: %s" , err , stdout )
}
return code , nil
}
2020-10-05 05:15:30 -04:00
// DialFromNode executes a tcp/udp curl/nc request based on protocol via kubectl exec
2016-08-24 21:08:12 -04:00
// in a test container running with host networking.
2020-10-05 05:15:30 -04:00
// - minTries is the minimum number of curl/nc attempts required before declaring
2020-10-04 15:20:03 -04:00
// success. If 0, then we return as soon as all endpoints succeed.
// - There is no logical change to test results if faillures happen AFTER endpoints have succeeded,
2020-10-05 05:15:30 -04:00
// hence over-padding minTries will NOT reverse a successful result and is thus not very useful yet
2020-10-04 15:20:03 -04:00
// (See the TODO about checking probability, which isnt implemented yet).
// - maxTries is the maximum number of curl/echo attempts before an error is returned. The
// smaller this number is, the less 'slack' there is for declaring success.
2021-03-30 09:28:23 -04:00
// - if maxTries < expectedEps, this test is guaranteed to return an error, because all endpoints won't be hit.
2020-10-04 15:20:03 -04:00
// - maxTries == minTries will return as soon as all endpoints succeed (or fail once maxTries is reached without
// success on all endpoints).
// In general its prudent to have a high enough level of minTries to guarantee that all pods get a fair chance at receiving traffic.
func ( config * NetworkingTestConfig ) DialFromNode ( protocol , targetIP string , targetPort , maxTries , minTries int , expectedEps sets . String ) error {
2016-08-24 21:08:12 -04:00
var cmd string
if protocol == "udp" {
2019-01-11 06:53:28 -05:00
cmd = fmt . Sprintf ( "echo hostName | nc -w 1 -u %s %d" , targetIP , targetPort )
2016-08-24 21:08:12 -04:00
} else {
2017-09-19 17:29:29 -04:00
ipPort := net . JoinHostPort ( targetIP , strconv . Itoa ( targetPort ) )
// The current versions of curl included in CentOS and RHEL distros
// misinterpret square brackets around IPv6 as globbing, so use the -g
// argument to disable globbing to handle the IPv6 case.
2018-10-08 18:17:10 -04:00
cmd = fmt . Sprintf ( "curl -g -q -s --max-time 15 --connect-timeout 1 http://%s/hostName" , ipPort )
2016-08-24 21:08:12 -04:00
}
// TODO: This simply tells us that we can reach the endpoints. Check that
// the probability of hitting a specific endpoint is roughly the same as
// hitting any other.
eps := sets . NewString ( )
filterCmd := fmt . Sprintf ( "%s | grep -v '^\\s*$'" , cmd )
2020-10-05 05:15:30 -04:00
framework . Logf ( "Going to poll %v on port %v at least %v times, with a maximum of %v tries before failing" , targetIP , targetPort , minTries , maxTries )
2016-08-24 21:08:12 -04:00
for i := 0 ; i < maxTries ; i ++ {
2016-10-04 17:04:13 -04:00
stdout , stderr , err := config . f . ExecShellInPodWithFullOutput ( config . HostTestContainerPod . Name , filterCmd )
if err != nil || len ( stderr ) > 0 {
// A failure to exec command counts as a try, not a hard fail.
// Also note that we will keep failing for maxTries in tests where
// we confirm unreachability.
2019-10-24 21:34:25 -04:00
framework . Logf ( "Failed to execute %q: %v, stdout: %q, stderr: %q" , filterCmd , err , stdout , stderr )
2016-10-04 17:04:13 -04:00
} else {
trimmed := strings . TrimSpace ( stdout )
if trimmed != "" {
eps . Insert ( trimmed )
}
}
2016-08-24 21:08:12 -04:00
// Check against i+1 so we exit if minTries == maxTries.
2017-02-22 12:13:19 -05:00
if eps . Equal ( expectedEps ) && i + 1 >= minTries {
2020-10-05 05:15:30 -04:00
framework . Logf ( "Found all %d expected endpoints: %+v" , eps . Len ( ) , eps . List ( ) )
2020-10-04 15:20:03 -04:00
return nil
2016-08-24 21:08:12 -04:00
}
2017-02-22 12:13:19 -05:00
2019-10-24 21:34:25 -04:00
framework . Logf ( "Waiting for %+v endpoints (expected=%+v, actual=%+v)" , expectedEps . Difference ( eps ) . List ( ) , expectedEps . List ( ) , eps . List ( ) )
2017-02-22 12:13:19 -05:00
2016-11-04 23:27:39 -04:00
// TODO: get rid of this delay #36281
time . Sleep ( hitEndpointRetryDelay )
2016-08-24 21:08:12 -04:00
}
config . diagnoseMissingEndpoints ( eps )
2020-10-05 05:15:30 -04:00
return fmt . Errorf ( "failed to find expected endpoints, \ntries %d\nCommand %v\nretrieved %v\nexpected %v" , maxTries , cmd , eps , expectedEps )
2016-08-24 21:08:12 -04:00
}
2016-10-04 17:06:25 -04:00
// GetSelfURL executes a curl against the given path via kubectl exec into a
2016-08-24 21:08:12 -04:00
// test container running with host networking, and fails if the output
// doesn't match the expected string.
2017-05-05 17:44:25 -04:00
func ( config * NetworkingTestConfig ) GetSelfURL ( port int32 , path string , expected string ) {
cmd := fmt . Sprintf ( "curl -i -q -s --connect-timeout 1 http://localhost:%d%s" , port , path )
2019-03-28 20:31:25 -04:00
ginkgo . By ( fmt . Sprintf ( "Getting kube-proxy self URL %s" , path ) )
2017-06-01 22:51:55 -04:00
config . executeCurlCmd ( cmd , expected )
}
2019-03-28 20:31:25 -04:00
// GetSelfURLStatusCode executes a curl against the given path via kubectl exec into a
2017-06-01 22:51:55 -04:00
// test container running with host networking, and fails if the returned status
// code doesn't match the expected string.
func ( config * NetworkingTestConfig ) GetSelfURLStatusCode ( port int32 , path string , expected string ) {
// check status code
cmd := fmt . Sprintf ( "curl -o /dev/null -i -q -s -w %%{http_code} --connect-timeout 1 http://localhost:%d%s" , port , path )
2019-03-28 20:31:25 -04:00
ginkgo . By ( fmt . Sprintf ( "Checking status code against http://localhost:%d%s" , port , path ) )
2017-06-01 22:51:55 -04:00
config . executeCurlCmd ( cmd , expected )
}
2016-11-16 16:51:48 -05:00
2017-06-01 22:51:55 -04:00
func ( config * NetworkingTestConfig ) executeCurlCmd ( cmd string , expected string ) {
2016-11-16 16:51:48 -05:00
// These are arbitrary timeouts. The curl command should pass on first try,
2017-06-01 22:51:55 -04:00
// unless remote server is starved/bootstrapping/restarting etc.
2016-11-16 16:51:48 -05:00
const retryInterval = 1 * time . Second
const retryTimeout = 30 * time . Second
podName := config . HostTestContainerPod . Name
var msg string
if pollErr := wait . PollImmediate ( retryInterval , retryTimeout , func ( ) ( bool , error ) {
2019-10-24 21:34:25 -04:00
stdout , err := framework . RunHostCmd ( config . Namespace , podName , cmd )
2016-11-16 16:51:48 -05:00
if err != nil {
msg = fmt . Sprintf ( "failed executing cmd %v in %v/%v: %v" , cmd , config . Namespace , podName , err )
2019-10-24 21:34:25 -04:00
framework . Logf ( msg )
2016-11-16 16:51:48 -05:00
return false , nil
}
if ! strings . Contains ( stdout , expected ) {
msg = fmt . Sprintf ( "successfully executed %v in %v/%v, but output '%v' doesn't contain expected string '%v'" , cmd , config . Namespace , podName , stdout , expected )
2019-10-24 21:34:25 -04:00
framework . Logf ( msg )
2016-11-16 16:51:48 -05:00
return false , nil
}
return true , nil
} ) ; pollErr != nil {
2019-10-24 21:34:25 -04:00
framework . Logf ( "\nOutput of kubectl describe pod %v/%v:\n" , config . Namespace , podName )
desc , _ := framework . RunKubectl (
2019-11-21 22:32:00 -05:00
config . Namespace , "describe" , "pod" , podName , fmt . Sprintf ( "--namespace=%v" , config . Namespace ) )
2019-10-24 21:34:25 -04:00
framework . Logf ( "%s" , desc )
framework . Failf ( "Timed out in %v: %v" , retryTimeout , msg )
2016-11-16 16:51:48 -05:00
}
2016-08-24 21:08:12 -04:00
}
2017-06-07 16:51:24 -04:00
func ( config * NetworkingTestConfig ) createNetShellPodSpec ( podName , hostname string ) * v1 . Pod {
2021-02-08 08:54:06 -05:00
netexecArgs := [ ] string {
"netexec" ,
fmt . Sprintf ( "--http-port=%d" , EndpointHTTPPort ) ,
fmt . Sprintf ( "--udp-port=%d" , EndpointUDPPort ) ,
}
// In case of hostnetwork endpoints, we want to bind the udp listener to specific ip addresses.
// In order to cover legacy AND dualstack, we pass both the host ip and the two pod ips. Agnhost
// removes duplicates and so this will listen on both addresses (or on the single existing one).
if config . EndpointsHostNetwork {
netexecArgs = append ( netexecArgs , "--udp-listen-addresses=$(HOST_IP),$(POD_IPS)" )
}
2016-11-18 15:55:17 -05:00
probe := & v1 . Probe {
2016-08-24 21:08:12 -04:00
InitialDelaySeconds : 10 ,
TimeoutSeconds : 30 ,
PeriodSeconds : 10 ,
SuccessThreshold : 1 ,
FailureThreshold : 3 ,
2021-10-29 16:15:11 -04:00
ProbeHandler : v1 . ProbeHandler {
2016-11-18 15:55:17 -05:00
HTTPGet : & v1 . HTTPGetAction {
2016-08-24 21:08:12 -04:00
Path : "/healthz" ,
2019-03-28 20:31:25 -04:00
Port : intstr . IntOrString { IntVal : EndpointHTTPPort } ,
2016-08-24 21:08:12 -04:00
} ,
} ,
}
2016-11-18 15:55:17 -05:00
pod := & v1 . Pod {
2016-12-03 13:57:26 -05:00
TypeMeta : metav1 . TypeMeta {
2016-08-24 21:08:12 -04:00
Kind : "Pod" ,
2018-05-01 10:54:37 -04:00
APIVersion : "v1" ,
2016-08-24 21:08:12 -04:00
} ,
2017-01-16 22:38:19 -05:00
ObjectMeta : metav1 . ObjectMeta {
2016-08-24 21:08:12 -04:00
Name : podName ,
2016-10-04 17:06:25 -04:00
Namespace : config . Namespace ,
2016-08-24 21:08:12 -04:00
} ,
2016-11-18 15:55:17 -05:00
Spec : v1 . PodSpec {
Containers : [ ] v1 . Container {
2016-08-24 21:08:12 -04:00
{
Name : "webserver" ,
2019-07-01 12:55:04 -04:00
Image : NetexecImageName ,
2016-11-18 15:55:17 -05:00
ImagePullPolicy : v1 . PullIfNotPresent ,
2021-02-08 08:54:06 -05:00
Args : netexecArgs ,
2016-11-18 15:55:17 -05:00
Ports : [ ] v1 . ContainerPort {
2016-08-24 21:08:12 -04:00
{
Name : "http" ,
2019-03-28 20:31:25 -04:00
ContainerPort : EndpointHTTPPort ,
2016-08-24 21:08:12 -04:00
} ,
{
Name : "udp" ,
2019-03-28 20:31:25 -04:00
ContainerPort : EndpointUDPPort ,
2016-11-18 15:55:17 -05:00
Protocol : v1 . ProtocolUDP ,
2016-08-24 21:08:12 -04:00
} ,
} ,
LivenessProbe : probe ,
ReadinessProbe : probe ,
} ,
} ,
2016-10-31 18:50:11 -04:00
NodeSelector : map [ string ] string {
2017-06-07 16:51:24 -04:00
"kubernetes.io/hostname" : hostname ,
2016-10-31 18:50:11 -04:00
} ,
2016-08-24 21:08:12 -04:00
} ,
}
2020-02-07 06:28:50 -05:00
// we want sctp to be optional as it will load the sctp kernel module
if config . SCTPEnabled {
pod . Spec . Containers [ 0 ] . Args = append ( pod . Spec . Containers [ 0 ] . Args , fmt . Sprintf ( "--sctp-port=%d" , EndpointSCTPPort ) )
pod . Spec . Containers [ 0 ] . Ports = append ( pod . Spec . Containers [ 0 ] . Ports , v1 . ContainerPort {
Name : "sctp" ,
ContainerPort : EndpointSCTPPort ,
Protocol : v1 . ProtocolSCTP ,
} )
}
2021-02-08 08:54:06 -05:00
if config . EndpointsHostNetwork {
pod . Spec . Containers [ 0 ] . Env = [ ] v1 . EnvVar {
{
Name : "HOST_IP" ,
ValueFrom : & v1 . EnvVarSource {
FieldRef : & v1 . ObjectFieldSelector {
FieldPath : "status.hostIP" ,
} ,
} ,
} ,
{
Name : "POD_IPS" ,
ValueFrom : & v1 . EnvVarSource {
FieldRef : & v1 . ObjectFieldSelector {
FieldPath : "status.podIPs" ,
} ,
} ,
} ,
}
}
2016-08-24 21:08:12 -04:00
return pod
}
2016-11-18 15:55:17 -05:00
func ( config * NetworkingTestConfig ) createTestPodSpec ( ) * v1 . Pod {
pod := & v1 . Pod {
2016-12-03 13:57:26 -05:00
TypeMeta : metav1 . TypeMeta {
2016-08-24 21:08:12 -04:00
Kind : "Pod" ,
2018-05-01 10:54:37 -04:00
APIVersion : "v1" ,
2016-08-24 21:08:12 -04:00
} ,
2017-01-16 22:38:19 -05:00
ObjectMeta : metav1 . ObjectMeta {
2016-08-24 21:08:12 -04:00
Name : testPodName ,
2016-10-04 17:06:25 -04:00
Namespace : config . Namespace ,
2016-08-24 21:08:12 -04:00
} ,
2016-11-18 15:55:17 -05:00
Spec : v1 . PodSpec {
Containers : [ ] v1 . Container {
2016-08-24 21:08:12 -04:00
{
Name : "webserver" ,
2019-07-01 12:55:04 -04:00
Image : NetexecImageName ,
2016-11-18 15:55:17 -05:00
ImagePullPolicy : v1 . PullIfNotPresent ,
2019-05-20 22:40:25 -04:00
Args : [ ] string {
"netexec" ,
2020-09-15 11:07:06 -04:00
fmt . Sprintf ( "--http-port=%d" , testContainerHTTPPort ) ,
2016-08-24 21:08:12 -04:00
} ,
2016-11-18 15:55:17 -05:00
Ports : [ ] v1 . ContainerPort {
2016-08-24 21:08:12 -04:00
{
Name : "http" ,
2019-03-28 20:31:25 -04:00
ContainerPort : testContainerHTTPPort ,
2016-08-24 21:08:12 -04:00
} ,
} ,
} ,
} ,
} ,
}
return pod
}
2017-11-05 14:25:55 -05:00
func ( config * NetworkingTestConfig ) createNodePortServiceSpec ( svcName string , selector map [ string ] string , enableSessionAffinity bool ) * v1 . Service {
sessionAffinity := v1 . ServiceAffinityNone
if enableSessionAffinity {
sessionAffinity = v1 . ServiceAffinityClientIP
}
2020-02-07 06:28:50 -05:00
res := & v1 . Service {
2017-01-16 22:38:19 -05:00
ObjectMeta : metav1 . ObjectMeta {
2017-11-05 14:25:55 -05:00
Name : svcName ,
2016-08-24 21:08:12 -04:00
} ,
2016-11-18 15:55:17 -05:00
Spec : v1 . ServiceSpec {
Type : v1 . ServiceTypeNodePort ,
Ports : [ ] v1 . ServicePort {
2019-03-28 20:31:25 -04:00
{ Port : ClusterHTTPPort , Name : "http" , Protocol : v1 . ProtocolTCP , TargetPort : intstr . FromInt ( EndpointHTTPPort ) } ,
{ Port : ClusterUDPPort , Name : "udp" , Protocol : v1 . ProtocolUDP , TargetPort : intstr . FromInt ( EndpointUDPPort ) } ,
2016-08-24 21:08:12 -04:00
} ,
2017-11-05 14:25:55 -05:00
Selector : selector ,
SessionAffinity : sessionAffinity ,
2016-08-24 21:08:12 -04:00
} ,
}
2020-02-07 06:28:50 -05:00
if config . SCTPEnabled {
res . Spec . Ports = append ( res . Spec . Ports , v1 . ServicePort { Port : ClusterSCTPPort , Name : "sctp" , Protocol : v1 . ProtocolSCTP , TargetPort : intstr . FromInt ( EndpointSCTPPort ) } )
}
2020-11-11 17:04:33 -05:00
if config . DualStackEnabled {
requireDual := v1 . IPFamilyPolicyRequireDualStack
res . Spec . IPFamilyPolicy = & requireDual
}
2020-02-07 06:28:50 -05:00
return res
2017-11-05 14:25:55 -05:00
}
func ( config * NetworkingTestConfig ) createNodePortService ( selector map [ string ] string ) {
2020-08-25 14:27:42 -04:00
config . NodePortService = config . CreateService ( config . createNodePortServiceSpec ( nodePortServiceName , selector , false ) )
2017-11-05 14:25:55 -05:00
}
func ( config * NetworkingTestConfig ) createSessionAffinityService ( selector map [ string ] string ) {
2020-08-25 14:27:42 -04:00
config . SessionAffinityService = config . CreateService ( config . createNodePortServiceSpec ( sessionAffinityServiceName , selector , true ) )
2016-08-24 21:08:12 -04:00
}
2019-03-28 20:31:25 -04:00
// DeleteNodePortService deletes NodePort service.
2016-10-04 17:06:25 -04:00
func ( config * NetworkingTestConfig ) DeleteNodePortService ( ) {
2020-03-01 12:24:42 -05:00
err := config . getServiceClient ( ) . Delete ( context . TODO ( ) , config . NodePortService . Name , metav1 . DeleteOptions { } )
2019-10-24 21:34:25 -04:00
framework . ExpectNoError ( err , "error while deleting NodePortService. err:%v)" , err )
2016-08-24 21:08:12 -04:00
time . Sleep ( 15 * time . Second ) // wait for kube-proxy to catch up with the service being deleted.
}
func ( config * NetworkingTestConfig ) createTestPods ( ) {
testContainerPod := config . createTestPodSpec ( )
2019-05-07 20:09:50 -04:00
hostTestContainerPod := e2epod . NewExecPodSpec ( config . Namespace , hostTestPodName , config . HostNetwork )
2016-08-24 21:08:12 -04:00
config . createPod ( testContainerPod )
2018-10-08 04:41:06 -04:00
if config . HostNetwork {
config . createPod ( hostTestContainerPod )
}
2016-08-24 21:08:12 -04:00
2020-03-16 18:31:09 -04:00
framework . ExpectNoError ( e2epod . WaitForPodNameRunningInNamespace ( config . f . ClientSet , testContainerPod . Name , config . f . Namespace . Name ) )
2016-08-24 21:08:12 -04:00
var err error
2020-02-07 21:16:47 -05:00
config . TestContainerPod , err = config . getPodClient ( ) . Get ( context . TODO ( ) , testContainerPod . Name , metav1 . GetOptions { } )
2016-08-24 21:08:12 -04:00
if err != nil {
2019-10-24 21:34:25 -04:00
framework . Failf ( "Failed to retrieve %s pod: %v" , testContainerPod . Name , err )
2016-08-24 21:08:12 -04:00
}
2018-10-08 04:41:06 -04:00
if config . HostNetwork {
2020-03-16 18:31:09 -04:00
framework . ExpectNoError ( e2epod . WaitForPodNameRunningInNamespace ( config . f . ClientSet , hostTestContainerPod . Name , config . f . Namespace . Name ) )
2020-02-07 21:16:47 -05:00
config . HostTestContainerPod , err = config . getPodClient ( ) . Get ( context . TODO ( ) , hostTestContainerPod . Name , metav1 . GetOptions { } )
2018-10-08 04:41:06 -04:00
if err != nil {
framework . Failf ( "Failed to retrieve %s pod: %v" , hostTestContainerPod . Name , err )
}
2016-08-24 21:08:12 -04:00
}
}
2020-08-25 14:27:42 -04:00
// CreateService creates the provided service in config.Namespace and returns created service
func ( config * NetworkingTestConfig ) CreateService ( serviceSpec * v1 . Service ) * v1 . Service {
2020-02-08 12:30:21 -05:00
_ , err := config . getServiceClient ( ) . Create ( context . TODO ( ) , serviceSpec , metav1 . CreateOptions { } )
2019-10-24 21:34:25 -04:00
framework . ExpectNoError ( err , fmt . Sprintf ( "Failed to create %s service: %v" , serviceSpec . Name , err ) )
2016-08-24 21:08:12 -04:00
2020-04-07 04:11:30 -04:00
err = WaitForService ( config . f . ClientSet , config . Namespace , serviceSpec . Name , true , 5 * time . Second , 45 * time . Second )
2019-10-24 21:34:25 -04:00
framework . ExpectNoError ( err , fmt . Sprintf ( "error while waiting for service:%s err: %v" , serviceSpec . Name , err ) )
2016-08-24 21:08:12 -04:00
2020-02-07 21:16:47 -05:00
createdService , err := config . getServiceClient ( ) . Get ( context . TODO ( ) , serviceSpec . Name , metav1 . GetOptions { } )
2019-10-24 21:34:25 -04:00
framework . ExpectNoError ( err , fmt . Sprintf ( "Failed to create %s service: %v" , serviceSpec . Name , err ) )
2016-08-24 21:08:12 -04:00
return createdService
}
2016-10-04 17:06:25 -04:00
// setupCore sets up the pods and core test config
// mainly for simplified node e2e setup
func ( config * NetworkingTestConfig ) setupCore ( selector map [ string ] string ) {
2019-03-28 20:31:25 -04:00
ginkgo . By ( "Creating the service pods in kubernetes" )
2016-10-04 17:06:25 -04:00
podName := "netserver"
config . EndpointPods = config . createNetProxyPods ( podName , selector )
2019-03-28 20:31:25 -04:00
ginkgo . By ( "Creating test pods" )
2016-10-04 17:06:25 -04:00
config . createTestPods ( )
epCount := len ( config . EndpointPods )
2020-10-07 09:51:42 -04:00
// Note that this is not O(n^2) in practice, because epCount SHOULD be < 10. In cases that epCount is > 10, this would be prohibitively large.
// Check maxNetProxyPodsCount for details.
2016-10-04 17:06:25 -04:00
config . MaxTries = epCount * epCount + testTries
2020-09-23 10:09:38 -04:00
framework . Logf ( "Setting MaxTries for pod polling to %v for networking test based on endpoint count %v" , config . MaxTries , epCount )
2016-10-04 17:06:25 -04:00
}
// setup includes setupCore and also sets up services
func ( config * NetworkingTestConfig ) setup ( selector map [ string ] string ) {
config . setupCore ( selector )
2016-08-24 21:08:12 -04:00
2019-03-28 20:31:25 -04:00
ginkgo . By ( "Getting node addresses" )
2019-10-24 21:34:25 -04:00
framework . ExpectNoError ( framework . WaitForAllNodesSchedulable ( config . f . ClientSet , 10 * time . Minute ) )
2019-09-03 15:00:00 -04:00
nodeList , err := e2enode . GetReadySchedulableNodes ( config . f . ClientSet )
2019-10-24 21:34:25 -04:00
framework . ExpectNoError ( err )
2017-04-23 22:21:29 -04:00
2020-01-17 03:41:35 -05:00
e2eskipper . SkipUnlessNodeCountIsAtLeast ( 2 )
2016-10-04 17:06:25 -04:00
config . Nodes = nodeList . Items
2016-08-24 21:08:12 -04:00
2019-03-28 20:31:25 -04:00
ginkgo . By ( "Creating the service on top of the pods in kubernetes" )
2016-10-04 17:06:25 -04:00
config . createNodePortService ( selector )
2017-11-05 14:25:55 -05:00
config . createSessionAffinityService ( selector )
2016-08-24 21:08:12 -04:00
2016-10-04 17:06:25 -04:00
for _ , p := range config . NodePortService . Spec . Ports {
2016-08-24 21:08:12 -04:00
switch p . Protocol {
2016-11-18 15:55:17 -05:00
case v1 . ProtocolUDP :
2019-03-28 20:31:25 -04:00
config . NodeUDPPort = int ( p . NodePort )
2016-11-18 15:55:17 -05:00
case v1 . ProtocolTCP :
2019-03-28 20:31:25 -04:00
config . NodeHTTPPort = int ( p . NodePort )
2020-02-07 06:28:50 -05:00
case v1 . ProtocolSCTP :
config . NodeSCTPPort = int ( p . NodePort )
2016-08-24 21:08:12 -04:00
default :
continue
}
}
2020-11-11 17:04:33 -05:00
// obtain the ClusterIP
2016-10-04 17:06:25 -04:00
config . ClusterIP = config . NodePortService . Spec . ClusterIP
2020-11-11 17:04:33 -05:00
if config . DualStackEnabled {
config . SecondaryClusterIP = config . NodePortService . Spec . ClusterIPs [ 1 ]
}
// Obtain the primary IP family of the Cluster based on the first ClusterIP
2020-11-16 17:11:11 -05:00
// TODO: Eventually we should just be getting these from Spec.IPFamilies
// but for now that would only if the feature gate is enabled.
2020-11-11 17:04:33 -05:00
family := v1 . IPv4Protocol
secondaryFamily := v1 . IPv6Protocol
if netutils . IsIPv6String ( config . ClusterIP ) {
family = v1 . IPv6Protocol
secondaryFamily = v1 . IPv4Protocol
}
// Get Node IPs from the cluster, ExternalIPs take precedence
2020-11-16 17:11:11 -05:00
config . NodeIP = e2enode . FirstAddressByTypeAndFamily ( nodeList , v1 . NodeExternalIP , family )
if config . NodeIP == "" {
2020-11-11 17:04:33 -05:00
config . NodeIP = e2enode . FirstAddressByTypeAndFamily ( nodeList , v1 . NodeInternalIP , family )
}
if config . DualStackEnabled {
2020-11-16 17:11:11 -05:00
config . SecondaryNodeIP = e2enode . FirstAddressByTypeAndFamily ( nodeList , v1 . NodeExternalIP , secondaryFamily )
if config . SecondaryNodeIP == "" {
2020-11-11 17:04:33 -05:00
config . SecondaryNodeIP = e2enode . FirstAddressByTypeAndFamily ( nodeList , v1 . NodeInternalIP , secondaryFamily )
}
2017-10-08 10:53:19 -04:00
}
2020-06-28 17:52:37 -04:00
ginkgo . By ( "Waiting for NodePort service to expose endpoint" )
err = framework . WaitForServiceEndpointsNum ( config . f . ClientSet , config . Namespace , nodePortServiceName , len ( config . EndpointPods ) , time . Second , wait . ForeverTestTimeout )
framework . ExpectNoError ( err , "failed to validate endpoints for service %s in namespace: %s" , nodePortServiceName , config . Namespace )
ginkgo . By ( "Waiting for Session Affinity service to expose endpoint" )
err = framework . WaitForServiceEndpointsNum ( config . f . ClientSet , config . Namespace , sessionAffinityServiceName , len ( config . EndpointPods ) , time . Second , wait . ForeverTestTimeout )
framework . ExpectNoError ( err , "failed to validate endpoints for service %s in namespace: %s" , sessionAffinityServiceName , config . Namespace )
2016-08-24 21:08:12 -04:00
}
2016-11-18 15:55:17 -05:00
func ( config * NetworkingTestConfig ) createNetProxyPods ( podName string , selector map [ string ] string ) [ ] * v1 . Pod {
2019-10-24 21:34:25 -04:00
framework . ExpectNoError ( framework . WaitForAllNodesSchedulable ( config . f . ClientSet , 10 * time . Minute ) )
2019-09-08 13:19:17 -04:00
nodeList , err := e2enode . GetBoundedReadySchedulableNodes ( config . f . ClientSet , maxNetProxyPodsCount )
2019-10-24 21:34:25 -04:00
framework . ExpectNoError ( err )
2019-09-08 13:19:17 -04:00
nodes := nodeList . Items
2016-08-24 21:08:12 -04:00
// create pods, one for each node
2016-11-18 15:55:17 -05:00
createdPods := make ( [ ] * v1 . Pod , 0 , len ( nodes ) )
2016-09-14 06:27:20 -04:00
for i , n := range nodes {
2016-08-24 21:08:12 -04:00
podName := fmt . Sprintf ( "%s-%d" , podName , i )
2017-06-07 16:51:24 -04:00
hostname , _ := n . Labels [ "kubernetes.io/hostname" ]
pod := config . createNetShellPodSpec ( podName , hostname )
2016-08-24 21:08:12 -04:00
pod . ObjectMeta . Labels = selector
2020-10-26 07:01:06 -04:00
pod . Spec . HostNetwork = config . EndpointsHostNetwork
2016-08-24 21:08:12 -04:00
createdPod := config . createPod ( pod )
createdPods = append ( createdPods , createdPod )
}
// wait that all of them are up
2016-11-18 15:55:17 -05:00
runningPods := make ( [ ] * v1 . Pod , 0 , len ( nodes ) )
2016-08-24 21:08:12 -04:00
for _ , p := range createdPods {
2020-03-22 11:08:52 -04:00
framework . ExpectNoError ( e2epod . WaitTimeoutForPodReadyInNamespace ( config . f . ClientSet , p . Name , config . f . Namespace . Name , framework . PodStartTimeout ) )
2020-02-07 21:16:47 -05:00
rp , err := config . getPodClient ( ) . Get ( context . TODO ( ) , p . Name , metav1 . GetOptions { } )
2019-10-24 21:34:25 -04:00
framework . ExpectNoError ( err )
2016-08-24 21:08:12 -04:00
runningPods = append ( runningPods , rp )
}
return runningPods
}
2019-03-28 20:31:25 -04:00
// DeleteNetProxyPod deletes the first endpoint pod and waits for it being removed.
2016-10-04 17:06:25 -04:00
func ( config * NetworkingTestConfig ) DeleteNetProxyPod ( ) {
pod := config . EndpointPods [ 0 ]
2020-03-01 13:19:56 -05:00
config . getPodClient ( ) . Delete ( context . TODO ( ) , pod . Name , * metav1 . NewDeleteOptions ( 0 ) )
2016-10-04 17:06:25 -04:00
config . EndpointPods = config . EndpointPods [ 1 : ]
2016-08-24 21:08:12 -04:00
// wait for pod being deleted.
2019-05-07 20:09:50 -04:00
err := e2epod . WaitForPodToDisappear ( config . f . ClientSet , config . Namespace , pod . Name , labels . Everything ( ) , time . Second , wait . ForeverTestTimeout )
2016-08-24 21:08:12 -04:00
if err != nil {
2019-10-24 21:34:25 -04:00
framework . Failf ( "Failed to delete %s pod: %v" , pod . Name , err )
2016-08-24 21:08:12 -04:00
}
// wait for endpoint being removed.
2019-10-24 21:34:25 -04:00
err = framework . WaitForServiceEndpointsNum ( config . f . ClientSet , config . Namespace , nodePortServiceName , len ( config . EndpointPods ) , time . Second , wait . ForeverTestTimeout )
2016-08-24 21:08:12 -04:00
if err != nil {
2019-10-24 21:34:25 -04:00
framework . Failf ( "Failed to remove endpoint from service: %s" , nodePortServiceName )
2016-08-24 21:08:12 -04:00
}
// wait for kube-proxy to catch up with the pod being deleted.
time . Sleep ( 5 * time . Second )
}
2016-11-18 15:55:17 -05:00
func ( config * NetworkingTestConfig ) createPod ( pod * v1 . Pod ) * v1 . Pod {
2016-10-04 17:06:25 -04:00
return config . getPodClient ( ) . Create ( pod )
2016-08-24 21:08:12 -04:00
}
2019-10-24 21:34:25 -04:00
func ( config * NetworkingTestConfig ) getPodClient ( ) * framework . PodClient {
2016-10-04 17:06:25 -04:00
if config . podClient == nil {
config . podClient = config . f . PodClient ( )
}
return config . podClient
2016-08-24 21:08:12 -04:00
}
2016-10-18 09:00:38 -04:00
func ( config * NetworkingTestConfig ) getServiceClient ( ) coreclientset . ServiceInterface {
2017-10-25 11:54:32 -04:00
return config . f . ClientSet . CoreV1 ( ) . Services ( config . Namespace )
2016-08-24 21:08:12 -04:00
}
2019-03-28 20:31:25 -04:00
// HTTPPokeParams is a struct for HTTP poke parameters.
2019-03-07 20:08:44 -05:00
type HTTPPokeParams struct {
2021-06-02 07:31:12 -04:00
Timeout time . Duration // default = 10 secs
ExpectCode int // default = 200
2019-03-07 20:08:44 -05:00
BodyContains string
RetriableCodes [ ] int
2020-10-30 14:09:50 -04:00
EnableHTTPS bool
2017-09-06 14:36:27 -04:00
}
2019-03-28 20:31:25 -04:00
// HTTPPokeResult is a struct for HTTP poke result.
2019-03-07 20:08:44 -05:00
type HTTPPokeResult struct {
Status HTTPPokeStatus
Code int // HTTP code: 0 if the connection was not made
Error error // if there was any error
Body [ ] byte // if code != 0
2016-12-29 18:35:47 -05:00
}
2019-03-28 20:31:25 -04:00
// HTTPPokeStatus is string for representing HTTP poke status.
2019-03-07 20:08:44 -05:00
type HTTPPokeStatus string
2017-09-06 14:36:27 -04:00
2019-03-07 20:08:44 -05:00
const (
2019-03-28 20:31:25 -04:00
// HTTPSuccess is HTTP poke status which is success.
2019-03-07 20:08:44 -05:00
HTTPSuccess HTTPPokeStatus = "Success"
2019-03-28 20:31:25 -04:00
// HTTPError is HTTP poke status which is error.
HTTPError HTTPPokeStatus = "UnknownError"
// HTTPTimeout is HTTP poke status which is timeout.
HTTPTimeout HTTPPokeStatus = "TimedOut"
// HTTPRefused is HTTP poke status which is connection refused.
HTTPRefused HTTPPokeStatus = "ConnectionRefused"
// HTTPRetryCode is HTTP poke status which is retry code.
HTTPRetryCode HTTPPokeStatus = "RetryCode"
// HTTPWrongCode is HTTP poke status which is wrong code.
HTTPWrongCode HTTPPokeStatus = "WrongCode"
// HTTPBadResponse is HTTP poke status which is bad response.
2019-03-07 20:08:44 -05:00
HTTPBadResponse HTTPPokeStatus = "BadResponse"
2019-03-28 20:31:25 -04:00
// Any time we add new errors, we should audit all callers of this.
2019-03-07 20:08:44 -05:00
)
2017-09-06 14:36:27 -04:00
2019-03-07 20:08:44 -05:00
// PokeHTTP tries to connect to a host on a port for a given URL path. Callers
// can specify additional success parameters, if desired.
//
// The result status will be characterized as precisely as possible, given the
// known users of this.
//
// The result code will be zero in case of any failure to connect, or non-zero
// if the HTTP transaction completed (even if the other test params make this a
// failure).
//
// The result error will be populated for any status other than Success.
//
// The result body will be populated if the HTTP transaction was completed, even
// if the other test params make this a failure).
func PokeHTTP ( host string , port int , path string , params * HTTPPokeParams ) HTTPPokeResult {
2020-10-30 14:09:50 -04:00
// Set default params.
if params == nil {
params = & HTTPPokeParams { }
}
2019-03-07 20:08:44 -05:00
hostPort := net . JoinHostPort ( host , strconv . Itoa ( port ) )
2020-10-30 14:09:50 -04:00
var url string
if params . EnableHTTPS {
url = fmt . Sprintf ( "https://%s%s" , hostPort , path )
} else {
url = fmt . Sprintf ( "http://%s%s" , hostPort , path )
}
2019-03-07 20:08:44 -05:00
ret := HTTPPokeResult { }
// Sanity check inputs, because it has happened. These are the only things
// that should hard fail the test - they are basically ASSERT()s.
if host == "" {
2019-10-24 21:34:25 -04:00
framework . Failf ( "Got empty host for HTTP poke (%s)" , url )
2019-03-07 20:08:44 -05:00
return ret
2016-12-29 18:35:47 -05:00
}
if port == 0 {
2019-10-24 21:34:25 -04:00
framework . Failf ( "Got port==0 for HTTP poke (%s)" , url )
2019-03-07 20:08:44 -05:00
return ret
}
if params . ExpectCode == 0 {
params . ExpectCode = http . StatusOK
2016-12-29 18:35:47 -05:00
}
2021-06-02 07:31:12 -04:00
if params . Timeout == 0 {
params . Timeout = 10 * time . Second
}
2019-10-24 21:34:25 -04:00
framework . Logf ( "Poking %q" , url )
2016-12-29 18:35:47 -05:00
2019-03-07 20:08:44 -05:00
resp , err := httpGetNoConnectionPoolTimeout ( url , params . Timeout )
2016-12-29 18:35:47 -05:00
if err != nil {
2019-03-07 20:08:44 -05:00
ret . Error = err
neterr , ok := err . ( net . Error )
if ok && neterr . Timeout ( ) {
ret . Status = HTTPTimeout
} else if strings . Contains ( err . Error ( ) , "connection refused" ) {
ret . Status = HTTPRefused
} else {
ret . Status = HTTPError
}
2019-10-24 21:34:25 -04:00
framework . Logf ( "Poke(%q): %v" , url , err )
2019-03-07 20:08:44 -05:00
return ret
2016-12-29 18:35:47 -05:00
}
2019-03-07 20:08:44 -05:00
ret . Code = resp . StatusCode
2016-12-29 18:35:47 -05:00
defer resp . Body . Close ( )
body , err := ioutil . ReadAll ( resp . Body )
if err != nil {
2019-03-07 20:08:44 -05:00
ret . Status = HTTPError
ret . Error = fmt . Errorf ( "error reading HTTP body: %v" , err )
2019-10-24 21:34:25 -04:00
framework . Logf ( "Poke(%q): %v" , url , ret . Error )
2019-03-07 20:08:44 -05:00
return ret
2016-12-29 18:35:47 -05:00
}
2019-03-07 20:08:44 -05:00
ret . Body = make ( [ ] byte , len ( body ) )
copy ( ret . Body , body )
if resp . StatusCode != params . ExpectCode {
for _ , code := range params . RetriableCodes {
2017-09-06 14:36:27 -04:00
if resp . StatusCode == code {
2019-03-07 20:08:44 -05:00
ret . Error = fmt . Errorf ( "retriable status code: %d" , resp . StatusCode )
ret . Status = HTTPRetryCode
2019-10-24 21:34:25 -04:00
framework . Logf ( "Poke(%q): %v" , url , ret . Error )
2019-03-07 20:08:44 -05:00
return ret
2017-09-06 14:36:27 -04:00
}
}
2019-03-07 20:08:44 -05:00
ret . Status = HTTPWrongCode
ret . Error = fmt . Errorf ( "bad status code: %d" , resp . StatusCode )
2019-10-24 21:34:25 -04:00
framework . Logf ( "Poke(%q): %v" , url , ret . Error )
2019-03-07 20:08:44 -05:00
return ret
2016-12-29 18:35:47 -05:00
}
2019-03-07 20:08:44 -05:00
if params . BodyContains != "" && ! strings . Contains ( string ( body ) , params . BodyContains ) {
ret . Status = HTTPBadResponse
ret . Error = fmt . Errorf ( "response does not contain expected substring: %q" , string ( body ) )
2019-10-24 21:34:25 -04:00
framework . Logf ( "Poke(%q): %v" , url , ret . Error )
2019-03-07 20:08:44 -05:00
return ret
2016-12-29 18:35:47 -05:00
}
2019-03-07 20:08:44 -05:00
ret . Status = HTTPSuccess
2019-10-24 21:34:25 -04:00
framework . Logf ( "Poke(%q): success" , url )
2019-03-07 20:08:44 -05:00
return ret
2016-12-29 18:35:47 -05:00
}
2019-03-07 20:08:44 -05:00
// Does an HTTP GET, but does not reuse TCP connections
// This masks problems where the iptables rule has changed, but we don't see it
func httpGetNoConnectionPoolTimeout ( url string , timeout time . Duration ) ( * http . Response , error ) {
tr := utilnet . SetTransportDefaults ( & http . Transport {
DisableKeepAlives : true ,
2020-10-30 14:09:50 -04:00
TLSClientConfig : & tls . Config { InsecureSkipVerify : true } ,
2019-03-07 20:08:44 -05:00
} )
client := & http . Client {
Transport : tr ,
Timeout : timeout ,
2016-12-29 18:35:47 -05:00
}
2019-03-07 20:08:44 -05:00
return client . Get ( url )
}
2016-12-29 18:35:47 -05:00
2019-03-28 20:31:25 -04:00
// TestUnderTemporaryNetworkFailure blocks outgoing network traffic on 'node'. Then runs testFunc and returns its status.
2017-03-23 05:53:57 -04:00
// At the end (even in case of errors), the network traffic is brought back to normal.
// This function executes commands on a node so it will work only for some
// environments.
func TestUnderTemporaryNetworkFailure ( c clientset . Interface , ns string , node * v1 . Node , testFunc func ( ) ) {
2019-05-24 02:47:40 -04:00
host , err := e2enode . GetExternalIP ( node )
2018-06-27 11:08:29 -04:00
if err != nil {
2019-10-24 21:34:25 -04:00
framework . Failf ( "Error getting node external ip : %v" , err )
2018-06-27 11:08:29 -04:00
}
2020-09-30 00:43:10 -04:00
controlPlaneAddresses := framework . GetControlPlaneAddresses ( c )
ginkgo . By ( fmt . Sprintf ( "block network traffic from node %s to the control plane" , node . Name ) )
2017-03-23 05:53:57 -04:00
defer func ( ) {
// This code will execute even if setting the iptables rule failed.
// It is on purpose because we may have an error even if the new rule
// had been inserted. (yes, we could look at the error code and ssh error
// separately, but I prefer to stay on the safe side).
2020-09-30 00:43:10 -04:00
ginkgo . By ( fmt . Sprintf ( "Unblock network traffic from node %s to the control plane" , node . Name ) )
for _ , instanceAddress := range controlPlaneAddresses {
UnblockNetwork ( host , instanceAddress )
2018-11-05 22:19:05 -05:00
}
2017-03-23 05:53:57 -04:00
} ( )
2019-10-24 21:34:25 -04:00
framework . Logf ( "Waiting %v to ensure node %s is ready before beginning test..." , resizeNodeReadyTimeout , node . Name )
2019-05-24 02:47:40 -04:00
if ! e2enode . WaitConditionToBe ( c , node . Name , v1 . NodeReady , true , resizeNodeReadyTimeout ) {
2019-10-24 21:34:25 -04:00
framework . Failf ( "Node %s did not become ready within %v" , node . Name , resizeNodeReadyTimeout )
2017-03-23 05:53:57 -04:00
}
2020-09-30 00:43:10 -04:00
for _ , instanceAddress := range controlPlaneAddresses {
BlockNetwork ( host , instanceAddress )
2018-11-05 22:19:05 -05:00
}
2017-03-23 05:53:57 -04:00
2019-10-24 21:34:25 -04:00
framework . Logf ( "Waiting %v for node %s to be not ready after simulated network failure" , resizeNodeNotReadyTimeout , node . Name )
2019-05-24 02:47:40 -04:00
if ! e2enode . WaitConditionToBe ( c , node . Name , v1 . NodeReady , false , resizeNodeNotReadyTimeout ) {
2019-10-24 21:34:25 -04:00
framework . Failf ( "Node %s did not become not-ready within %v" , node . Name , resizeNodeNotReadyTimeout )
2017-03-23 05:53:57 -04:00
}
testFunc ( )
// network traffic is unblocked in a deferred function
}
2020-03-24 02:00:50 -04:00
// BlockNetwork blocks network between the given from value and the given to value.
// The following helper functions can block/unblock network from source
// host to destination host by manipulating iptable rules.
// This function assumes it can ssh to the source host.
//
// Caution:
// Recommend to input IP instead of hostnames. Using hostnames will cause iptables to
// do a DNS lookup to resolve the name to an IP address, which will
// slow down the test and cause it to fail if DNS is absent or broken.
//
// Suggested usage pattern:
// func foo() {
// ...
// defer UnblockNetwork(from, to)
// BlockNetwork(from, to)
// ...
// }
//
func BlockNetwork ( from string , to string ) {
framework . Logf ( "block network traffic from %s to %s" , from , to )
iptablesRule := fmt . Sprintf ( "OUTPUT --destination %s --jump REJECT" , to )
dropCmd := fmt . Sprintf ( "sudo iptables --insert %s" , iptablesRule )
if result , err := e2essh . SSH ( dropCmd , from , framework . TestContext . Provider ) ; result . Code != 0 || err != nil {
e2essh . LogResult ( result )
framework . Failf ( "Unexpected error: %v" , err )
}
}
// UnblockNetwork unblocks network between the given from value and the given to value.
func UnblockNetwork ( from string , to string ) {
framework . Logf ( "Unblock network traffic from %s to %s" , from , to )
iptablesRule := fmt . Sprintf ( "OUTPUT --destination %s --jump REJECT" , to )
undropCmd := fmt . Sprintf ( "sudo iptables --delete %s" , iptablesRule )
// Undrop command may fail if the rule has never been created.
// In such case we just lose 30 seconds, but the cluster is healthy.
// But if the rule had been created and removing it failed, the node is broken and
// not coming back. Subsequent tests will run or fewer nodes (some of the tests
// may fail). Manual intervention is required in such case (recreating the
// cluster solves the problem too).
err := wait . Poll ( time . Millisecond * 100 , time . Second * 30 , func ( ) ( bool , error ) {
result , err := e2essh . SSH ( undropCmd , from , framework . TestContext . Provider )
if result . Code == 0 && err == nil {
return true , nil
}
e2essh . LogResult ( result )
if err != nil {
framework . Logf ( "Unexpected error: %v" , err )
}
return false , nil
} )
if err != nil {
framework . Failf ( "Failed to remove the iptable REJECT rule. Manual intervention is " +
"required on host %s: remove rule %s, if exists" , from , iptablesRule )
}
}
2020-04-07 04:11:30 -04:00
// WaitForService waits until the service appears (exist == true), or disappears (exist == false)
func WaitForService ( c clientset . Interface , namespace , name string , exist bool , interval , timeout time . Duration ) error {
err := wait . PollImmediate ( interval , timeout , func ( ) ( bool , error ) {
_ , err := c . CoreV1 ( ) . Services ( namespace ) . Get ( context . TODO ( ) , name , metav1 . GetOptions { } )
switch {
case err == nil :
framework . Logf ( "Service %s in namespace %s found." , name , namespace )
return exist , nil
case apierrors . IsNotFound ( err ) :
framework . Logf ( "Service %s in namespace %s disappeared." , name , namespace )
return ! exist , nil
2020-10-12 14:04:47 -04:00
case err != nil :
2020-04-07 04:11:30 -04:00
framework . Logf ( "Non-retryable failure while getting service." )
return false , err
default :
framework . Logf ( "Get service %s in namespace %s failed: %v" , name , namespace , err )
return false , nil
}
} )
if err != nil {
stateMsg := map [ bool ] string { true : "to appear" , false : "to disappear" }
return fmt . Errorf ( "error waiting for service %s/%s %s: %v" , namespace , name , stateMsg [ exist ] , err )
}
return nil
}