2021-02-01 02:32:41 -05:00
// +build linux
/ *
Copyright 2021 The Kubernetes Authors .
Licensed under the Apache License , Version 2.0 ( the "License" ) ;
you may not use this file except in compliance with the License .
You may obtain a copy of the License at
http : //www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing , software
distributed under the License is distributed on an "AS IS" BASIS ,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND , either express or implied .
See the License for the specific language governing permissions and
limitations under the License .
* /
package e2enode
import (
"context"
"fmt"
"strconv"
"time"
"k8s.io/apimachinery/pkg/fields"
"github.com/onsi/ginkgo"
"github.com/onsi/gomega"
"k8s.io/kubernetes/pkg/apis/scheduling"
"k8s.io/kubernetes/pkg/features"
2021-03-04 02:31:57 -05:00
"k8s.io/kubernetes/test/e2e/framework"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2021-02-01 02:32:41 -05:00
kubeletconfig "k8s.io/kubernetes/pkg/kubelet/apis/config"
kubelettypes "k8s.io/kubernetes/pkg/kubelet/types"
2021-03-04 02:31:57 -05:00
testutils "k8s.io/kubernetes/test/utils"
2021-02-01 02:32:41 -05:00
)
2021-02-22 13:53:33 -05:00
var _ = SIGDescribe ( "GracefulNodeShutdown [Serial] [NodeAlphaFeature:GracefulNodeShutdown]" , func ( ) {
2021-02-01 02:32:41 -05:00
f := framework . NewDefaultFramework ( "graceful-node-shutdown" )
ginkgo . Context ( "when gracefully shutting down" , func ( ) {
const (
pollInterval = 1 * time . Second
podStatusUpdateTimeout = 5 * time . Second
nodeStatusUpdateTimeout = 10 * time . Second
nodeShutdownGracePeriod = 20 * time . Second
nodeShutdownGracePeriodCriticalPods = 10 * time . Second
)
tempSetCurrentKubeletConfig ( f , func ( initialConfig * kubeletconfig . KubeletConfiguration ) {
initialConfig . FeatureGates = map [ string ] bool {
string ( features . GracefulNodeShutdown ) : true ,
}
initialConfig . ShutdownGracePeriod = metav1 . Duration { Duration : nodeShutdownGracePeriod }
initialConfig . ShutdownGracePeriodCriticalPods = metav1 . Duration { Duration : nodeShutdownGracePeriodCriticalPods }
} )
2021-04-28 04:10:11 -04:00
ginkgo . BeforeEach ( func ( ) {
ginkgo . By ( "Wait for the node to be ready" )
waitForNodeReady ( )
} )
2021-02-01 02:32:41 -05:00
ginkgo . AfterEach ( func ( ) {
ginkgo . By ( "Emitting Shutdown false signal; cancelling the shutdown" )
err := emitSignalPrepareForShutdown ( false )
framework . ExpectNoError ( err )
} )
ginkgo . It ( "should be able to gracefully shutdown pods with various grace periods" , func ( ) {
nodeName := getNodeName ( f )
nodeSelector := fields . Set {
"spec.nodeName" : nodeName ,
} . AsSelector ( ) . String ( )
// Define test pods
pods := [ ] * v1 . Pod {
getGracePeriodOverrideTestPod ( "period-120" , nodeName , 120 , false ) ,
getGracePeriodOverrideTestPod ( "period-5" , nodeName , 5 , false ) ,
getGracePeriodOverrideTestPod ( "period-critical-120" , nodeName , 120 , true ) ,
getGracePeriodOverrideTestPod ( "period-critical-5" , nodeName , 5 , true ) ,
}
ginkgo . By ( "Creating batch pods" )
f . PodClient ( ) . CreateBatch ( pods )
list , err := f . PodClient ( ) . List ( context . TODO ( ) , metav1 . ListOptions {
FieldSelector : nodeSelector ,
} )
framework . ExpectNoError ( err )
framework . ExpectEqual ( len ( list . Items ) , len ( pods ) , "the number of pods is not as expected" )
2021-03-04 02:31:57 -05:00
ginkgo . By ( "Verifying batch pods are running" )
2021-02-01 02:32:41 -05:00
for _ , pod := range list . Items {
2021-03-04 02:31:57 -05:00
if podReady , err := testutils . PodRunningReady ( & pod ) ; err != nil || ! podReady {
framework . Failf ( "Failed to start batch pod: %v" , pod . Name )
}
2021-02-01 02:32:41 -05:00
}
ginkgo . By ( "Emitting shutdown signal" )
err = emitSignalPrepareForShutdown ( true )
framework . ExpectNoError ( err )
2021-03-04 02:31:57 -05:00
ginkgo . By ( "Verifying that non-critical pods are shutdown" )
2021-02-01 02:32:41 -05:00
// Not critical pod should be shutdown
gomega . Eventually ( func ( ) error {
list , err = f . PodClient ( ) . List ( context . TODO ( ) , metav1 . ListOptions {
FieldSelector : nodeSelector ,
} )
if err != nil {
return err
}
framework . ExpectEqual ( len ( list . Items ) , len ( pods ) , "the number of pods is not as expected" )
for _ , pod := range list . Items {
if kubelettypes . IsCriticalPod ( & pod ) {
if pod . Status . Phase != v1 . PodRunning {
2021-03-04 02:31:57 -05:00
framework . Logf ( "Expecting critcal pod to be running, but it's not currently. Pod: %q, Pod Status Phase: %q, Pod Status Reason: %q" , pod . Name , pod . Status . Phase , pod . Status . Reason )
2021-02-01 02:32:41 -05:00
return fmt . Errorf ( "critical pod should not be shutdown, phase: %s" , pod . Status . Phase )
}
} else {
if pod . Status . Phase != v1 . PodFailed || pod . Status . Reason != "Shutdown" {
2021-03-04 02:31:57 -05:00
framework . Logf ( "Expecting non-critcal pod to be shutdown, but it's not currently. Pod: %q, Pod Status Phase: %q, Pod Status Reason: %q" , pod . Name , pod . Status . Phase , pod . Status . Reason )
2021-02-01 02:32:41 -05:00
return fmt . Errorf ( "pod should be shutdown, phase: %s" , pod . Status . Phase )
}
}
}
return nil
} , podStatusUpdateTimeout , pollInterval ) . Should ( gomega . BeNil ( ) )
2021-03-04 02:31:57 -05:00
ginkgo . By ( "Verifying that all pods are shutdown" )
2021-02-01 02:32:41 -05:00
// All pod should be shutdown
gomega . Eventually ( func ( ) error {
list , err = f . PodClient ( ) . List ( context . TODO ( ) , metav1 . ListOptions {
FieldSelector : nodeSelector ,
} )
if err != nil {
return err
}
framework . ExpectEqual ( len ( list . Items ) , len ( pods ) , "the number of pods is not as expected" )
for _ , pod := range list . Items {
if pod . Status . Phase != v1 . PodFailed || pod . Status . Reason != "Shutdown" {
2021-03-04 02:31:57 -05:00
framework . Logf ( "Expecting pod to be shutdown, but it's not currently: Pod: %q, Pod Status Phase: %q, Pod Status Reason: %q" , pod . Name , pod . Status . Phase , pod . Status . Reason )
2021-02-01 02:32:41 -05:00
return fmt . Errorf ( "pod should be shutdown, phase: %s" , pod . Status . Phase )
}
}
return nil
} ,
// Critical pod starts shutdown after (nodeShutdownGracePeriod-nodeShutdownGracePeriodCriticalPods)
podStatusUpdateTimeout + ( nodeShutdownGracePeriod - nodeShutdownGracePeriodCriticalPods ) ,
pollInterval ) . Should ( gomega . BeNil ( ) )
} )
ginkgo . It ( "should be able to handle a cancelled shutdown" , func ( ) {
ginkgo . By ( "Emitting Shutdown signal" )
err := emitSignalPrepareForShutdown ( true )
framework . ExpectNoError ( err )
gomega . Eventually ( func ( ) error {
isReady := getNodeReadyStatus ( f )
if isReady {
return fmt . Errorf ( "node did not become shutdown as expected" )
}
return nil
} , nodeStatusUpdateTimeout , pollInterval ) . Should ( gomega . BeNil ( ) )
ginkgo . By ( "Emitting Shutdown false signal; cancelling the shutdown" )
err = emitSignalPrepareForShutdown ( false )
framework . ExpectNoError ( err )
gomega . Eventually ( func ( ) error {
isReady := getNodeReadyStatus ( f )
if ! isReady {
return fmt . Errorf ( "node did not recover as expected" )
}
return nil
} , nodeStatusUpdateTimeout , pollInterval ) . Should ( gomega . BeNil ( ) )
} )
} )
} )
func getGracePeriodOverrideTestPod ( name string , node string , gracePeriod int64 , critical bool ) * v1 . Pod {
pod := & v1 . Pod {
TypeMeta : metav1 . TypeMeta {
Kind : "Pod" ,
APIVersion : "v1" ,
} ,
ObjectMeta : metav1 . ObjectMeta {
Name : name ,
} ,
Spec : v1 . PodSpec {
Containers : [ ] v1 . Container {
{
Name : name ,
Image : busyboxImage ,
Command : [ ] string { "sh" , "-c" } ,
Args : [ ] string { `
_term ( ) {
echo "Caught SIGTERM signal!"
2021-03-04 02:31:57 -05:00
while true ; do sleep 5 ; done
2021-02-01 02:32:41 -05:00
}
2021-03-04 02:31:57 -05:00
trap _term SIGTERM
while true ; do sleep 5 ; done
2021-02-01 02:32:41 -05:00
` } ,
} ,
} ,
TerminationGracePeriodSeconds : & gracePeriod ,
NodeName : node ,
} ,
}
if critical {
pod . ObjectMeta . Annotations = map [ string ] string {
kubelettypes . ConfigSourceAnnotationKey : kubelettypes . FileSource ,
}
pod . Spec . PriorityClassName = scheduling . SystemNodeCritical
framework . ExpectEqual ( kubelettypes . IsCriticalPod ( pod ) , true , "pod should be a critical pod" )
} else {
framework . ExpectEqual ( kubelettypes . IsCriticalPod ( pod ) , false , "pod should not be a critical pod" )
}
return pod
}
// Emits a fake PrepareForShutdown dbus message on system dbus. Will cause kubelet to react to an active shutdown event.
func emitSignalPrepareForShutdown ( b bool ) error {
2021-03-04 02:31:57 -05:00
cmd := "dbus-send --system /org/freedesktop/login1 org.freedesktop.login1.Manager.PrepareForShutdown boolean:" + strconv . FormatBool ( b )
2021-02-01 02:32:41 -05:00
_ , err := runCommand ( "sh" , "-c" , cmd )
return err
}
func getNodeReadyStatus ( f * framework . Framework ) bool {
nodeList , err := f . ClientSet . CoreV1 ( ) . Nodes ( ) . List ( context . TODO ( ) , metav1 . ListOptions { } )
framework . ExpectNoError ( err )
// Assuming that there is only one node, because this is a node e2e test.
framework . ExpectEqual ( len ( nodeList . Items ) , 1 )
return isNodeReady ( & nodeList . Items [ 0 ] )
}