2017-12-29 05:43:38 -05:00
/ *
Copyright 2017 The Kubernetes Authors .
Licensed under the Apache License , Version 2.0 ( the "License" ) ;
you may not use this file except in compliance with the License .
You may obtain a copy of the License at
http : //www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing , software
distributed under the License is distributed on an "AS IS" BASIS ,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND , either express or implied .
See the License for the specific language governing permissions and
limitations under the License .
* /
2019-11-06 22:59:05 -05:00
package e2enode
2017-12-29 05:43:38 -05:00
import (
2020-02-07 21:16:47 -05:00
"context"
2023-04-26 12:41:14 -04:00
"errors"
2023-01-31 20:28:45 -05:00
"fmt"
2023-04-26 12:41:14 -04:00
"os"
2017-12-29 05:43:38 -05:00
"path/filepath"
2022-05-16 08:13:39 -04:00
"regexp"
2023-03-09 13:20:46 -05:00
"strings"
2017-12-29 05:43:38 -05:00
"time"
2022-03-29 02:12:12 -04:00
"github.com/onsi/ginkgo/v2"
2022-05-16 08:13:39 -04:00
"github.com/onsi/gomega"
2023-06-15 11:41:42 -04:00
"github.com/onsi/gomega/gcustom"
"github.com/onsi/gomega/types"
2022-05-16 08:13:39 -04:00
2019-12-09 03:09:29 -05:00
appsv1 "k8s.io/api/apps/v1"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/runtime/serializer"
2023-06-15 11:41:42 -04:00
k8stypes "k8s.io/apimachinery/pkg/types"
2023-03-09 13:20:46 -05:00
"k8s.io/apimachinery/pkg/util/sets"
2023-04-26 12:41:14 -04:00
runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1"
e2e: node: fix plugins directory
Previously, the e2e test was overriding the plugins socket directory to
"/var/lib/kubelet/plugins_registry". This seems wrong, and with that
setting the e2e test was already failing, because the registration
process was timing out, in turn because the kubelet was trying to call
back the device plugin in the wrong place (see below for details).
I can't explain why it worked before - or it if worked at all - but
it really seems that `pluginapi.DevicePluginPath` is the right
setting here.
+++
In a nutshell, the device plugin registration process works like this:
1. The kubelet runs and creates the device plugin socket registration
endpoint:
KubeletSocket = DevicePluginPath + "kubelet.sock"
DevicePluginPath = "/var/lib/kubelet/device-plugins/"
2. Each device plugin will listen to an ENDPOINT the kubelet will connect
backk to. IOW the kubelet will act like a client to each device plugin,
to perform allocation requests (and more)
Each device plugin will serve from a endpoint.
The endpoint name is plugin-specific, but they all must be inside a
well-known directory: pluginapi.DevicePluginPath
3. The kubelet creates the device plugin pod, like any other pod
4. During the startup, each device plugin wants to register itself in the
kubelet. So it sends a request through
the registration endpoint. Key details:
grpc.Dial(kubelet registration socket)
registration request
reqt := &pluginapi.RegisterRequest{
Version: pluginapi.Version,
Endpoint: endpointSocket, <- socket relative to pluginapi.DevicePluginPath
ResourceName: resourceName, <- resource name to be exposed
}
5. While handling the registration request, kubelet dial back the
device plugin on socketDir + req.Endpoint.
But socketDir is hardcoded in the device manager code to
pluginapi.KubeletSocket
Signed-off-by: Francesco Romani <fromani@redhat.com>
2022-05-12 13:51:37 -04:00
kubeletdevicepluginv1beta1 "k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1"
2022-04-04 08:00:06 -04:00
admissionapi "k8s.io/pod-security-admission/api"
2019-12-09 03:09:29 -05:00
2017-12-29 05:43:38 -05:00
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2018-01-24 12:06:07 -05:00
"k8s.io/apimachinery/pkg/util/uuid"
2023-06-15 11:41:42 -04:00
"k8s.io/kubectl/pkg/util/podutils"
2020-10-10 19:03:31 -04:00
kubeletpodresourcesv1 "k8s.io/kubelet/pkg/apis/podresources/v1"
2020-06-30 01:13:45 -04:00
kubeletpodresourcesv1alpha1 "k8s.io/kubelet/pkg/apis/podresources/v1alpha1"
2024-11-21 15:38:57 -05:00
"k8s.io/kubernetes/test/e2e/feature"
2017-12-29 05:43:38 -05:00
"k8s.io/kubernetes/test/e2e/framework"
2018-08-28 02:41:42 -04:00
e2enode "k8s.io/kubernetes/test/e2e/framework/node"
2019-05-07 20:09:50 -04:00
e2epod "k8s.io/kubernetes/test/e2e/framework/pod"
2023-04-26 12:41:14 -04:00
e2etestfiles "k8s.io/kubernetes/test/e2e/framework/testfiles"
2017-12-29 05:43:38 -05:00
)
2019-12-09 03:09:29 -05:00
var (
appsScheme = runtime . NewScheme ( )
appsCodecs = serializer . NewCodecFactory ( appsScheme )
)
2018-01-11 01:41:45 -05:00
// Serial because the test restarts Kubelet
2024-12-11 16:11:51 -05:00
var _ = SIGDescribe ( "Device Plugin" , framework . WithSerial ( ) , feature . DevicePlugin , func ( ) {
2018-01-24 12:06:07 -05:00
f := framework . NewDefaultFramework ( "device-plugin-errors" )
2023-05-10 09:38:10 -04:00
f . NamespacePodSecurityLevel = admissionapi . LevelPrivileged
e2e: node: fix plugins directory
Previously, the e2e test was overriding the plugins socket directory to
"/var/lib/kubelet/plugins_registry". This seems wrong, and with that
setting the e2e test was already failing, because the registration
process was timing out, in turn because the kubelet was trying to call
back the device plugin in the wrong place (see below for details).
I can't explain why it worked before - or it if worked at all - but
it really seems that `pluginapi.DevicePluginPath` is the right
setting here.
+++
In a nutshell, the device plugin registration process works like this:
1. The kubelet runs and creates the device plugin socket registration
endpoint:
KubeletSocket = DevicePluginPath + "kubelet.sock"
DevicePluginPath = "/var/lib/kubelet/device-plugins/"
2. Each device plugin will listen to an ENDPOINT the kubelet will connect
backk to. IOW the kubelet will act like a client to each device plugin,
to perform allocation requests (and more)
Each device plugin will serve from a endpoint.
The endpoint name is plugin-specific, but they all must be inside a
well-known directory: pluginapi.DevicePluginPath
3. The kubelet creates the device plugin pod, like any other pod
4. During the startup, each device plugin wants to register itself in the
kubelet. So it sends a request through
the registration endpoint. Key details:
grpc.Dial(kubelet registration socket)
registration request
reqt := &pluginapi.RegisterRequest{
Version: pluginapi.Version,
Endpoint: endpointSocket, <- socket relative to pluginapi.DevicePluginPath
ResourceName: resourceName, <- resource name to be exposed
}
5. While handling the registration request, kubelet dial back the
device plugin on socketDir + req.Endpoint.
But socketDir is hardcoded in the device manager code to
pluginapi.KubeletSocket
Signed-off-by: Francesco Romani <fromani@redhat.com>
2022-05-12 13:51:37 -04:00
testDevicePlugin ( f , kubeletdevicepluginv1beta1 . DevicePluginPath )
2023-04-26 12:41:14 -04:00
testDevicePluginNodeReboot ( f , kubeletdevicepluginv1beta1 . DevicePluginPath )
2018-01-24 12:06:07 -05:00
} )
2018-01-11 01:41:45 -05:00
2019-12-09 03:09:29 -05:00
// readDaemonSetV1OrDie reads daemonset object from bytes. Panics on error.
func readDaemonSetV1OrDie ( objBytes [ ] byte ) * appsv1 . DaemonSet {
appsv1 . AddToScheme ( appsScheme )
requiredObj , err := runtime . Decode ( appsCodecs . UniversalDecoder ( appsv1 . SchemeGroupVersion ) , objBytes )
if err != nil {
panic ( err )
}
return requiredObj . ( * appsv1 . DaemonSet )
}
2023-03-13 01:18:05 -04:00
const (
// TODO(vikasc): Instead of hard-coding number of devices, provide number of devices in the sample-device-plugin using configmap
// and then use the same here
expectedSampleDevsAmount int64 = 2
2023-03-13 05:42:30 -04:00
// This is the sleep interval specified in the command executed in the pod to ensure container is running "forever" in the test timescale
sleepIntervalForever string = "24h"
// This is the sleep interval specified in the command executed in the pod so that container is restarted within the expected test run time
sleepIntervalWithRestart string = "60s"
2024-07-24 04:49:18 -04:00
// This is the sleep interval specified in the command executed in the pod so that container is restarted within the expected test run time
sleepIntervalToCompletion string = "5s"
2023-03-13 01:18:05 -04:00
)
2019-03-12 15:58:23 -04:00
func testDevicePlugin ( f * framework . Framework , pluginSockDir string ) {
2025-07-24 23:23:43 -04:00
pluginSockDir = filepath . Clean ( pluginSockDir ) + "/"
2024-07-24 04:49:18 -04:00
type ResourceValue struct {
Allocatable int
Capacity int
}
devicePluginGracefulTimeout := 5 * time . Minute // see endpointStopGracePeriod in pkg/kubelet/cm/devicemanager/types.go
var getNodeResourceValues = func ( ctx context . Context , resourceName string ) ResourceValue {
ginkgo . GinkgoHelper ( )
node := getLocalNode ( ctx , f )
// -1 represents that the resource is not found
result := ResourceValue {
Allocatable : - 1 ,
Capacity : - 1 ,
}
for key , val := range node . Status . Capacity {
resource := string ( key )
if resource == resourceName {
result . Capacity = int ( val . Value ( ) )
break
}
}
for key , val := range node . Status . Allocatable {
resource := string ( key )
if resource == resourceName {
result . Allocatable = int ( val . Value ( ) )
break
}
}
return result
}
2023-06-20 04:27:14 -04:00
f . Context ( "DevicePlugin" , f . WithSerial ( ) , f . WithDisruptive ( ) , func ( ) {
2021-11-10 11:21:39 -05:00
var devicePluginPod , dptemplate * v1 . Pod
2022-10-24 11:57:30 -04:00
var v1alphaPodResources * kubeletpodresourcesv1alpha1 . ListPodResourcesResponse
var v1PodResources * kubeletpodresourcesv1 . ListPodResourcesResponse
var err error
2023-03-09 13:20:46 -05:00
2022-12-12 04:11:10 -05:00
ginkgo . BeforeEach ( func ( ctx context . Context ) {
2021-11-10 11:21:39 -05:00
ginkgo . By ( "Wait for node to be ready" )
2022-12-12 04:11:10 -05:00
gomega . Eventually ( ctx , func ( ctx context . Context ) bool {
nodes , err := e2enode . TotalReady ( ctx , f . ClientSet )
2021-11-10 11:21:39 -05:00
framework . ExpectNoError ( err )
return nodes == 1
2024-07-31 11:58:15 -04:00
} , time . Minute , time . Second ) . Should ( gomega . BeTrueBecause ( "expected node to be ready" ) )
2021-11-10 11:21:39 -05:00
2022-10-24 11:57:30 -04:00
// Before we run the device plugin test, we need to ensure
// that the cluster is in a clean state and there are no
// pods running on this node.
2023-01-31 20:28:45 -05:00
// This is done in a gomega.Eventually with retries since a prior test in a different test suite could've run and the deletion of it's resources may still be in progress.
// xref: https://issue.k8s.io/115381
gomega . Eventually ( ctx , func ( ctx context . Context ) error {
v1alphaPodResources , err = getV1alpha1NodeDevices ( ctx )
if err != nil {
return fmt . Errorf ( "failed to get node local podresources by accessing the (v1alpha) podresources API endpoint: %v" , err )
}
v1PodResources , err = getV1NodeDevices ( ctx )
if err != nil {
return fmt . Errorf ( "failed to get node local podresources by accessing the (v1) podresources API endpoint: %v" , err )
}
if len ( v1alphaPodResources . PodResources ) > 0 {
return fmt . Errorf ( "expected v1alpha pod resources to be empty, but got non-empty resources: %+v" , v1alphaPodResources . PodResources )
}
if len ( v1PodResources . PodResources ) > 0 {
return fmt . Errorf ( "expected v1 pod resources to be empty, but got non-empty resources: %+v" , v1PodResources . PodResources )
}
return nil
} , f . Timeouts . PodDelete , f . Timeouts . Poll ) . Should ( gomega . Succeed ( ) )
2022-10-24 11:57:30 -04:00
2021-11-10 11:21:39 -05:00
ginkgo . By ( "Scheduling a sample device plugin pod" )
2023-02-21 07:50:33 -05:00
dp := getSampleDevicePluginPod ( pluginSockDir )
2022-05-11 11:44:32 -04:00
dptemplate = dp . DeepCopy ( )
2022-12-12 04:11:10 -05:00
devicePluginPod = e2epod . NewPodClient ( f ) . CreateSync ( ctx , dp )
2018-01-11 01:41:45 -05:00
2019-07-28 00:49:36 -04:00
ginkgo . By ( "Waiting for devices to become available on the local node" )
2022-12-12 04:11:10 -05:00
gomega . Eventually ( ctx , func ( ctx context . Context ) bool {
node , ready := getLocalTestNode ( ctx , f )
2023-02-21 07:50:33 -05:00
return ready && CountSampleDeviceCapacity ( node ) > 0
2024-07-31 11:58:15 -04:00
} , 5 * time . Minute , framework . Poll ) . Should ( gomega . BeTrueBecause ( "expected devices to be available on local node" ) )
2019-08-27 05:18:43 -04:00
framework . Logf ( "Successfully created device plugin pod" )
2018-01-11 01:41:45 -05:00
2023-03-13 01:18:05 -04:00
ginkgo . By ( fmt . Sprintf ( "Waiting for the resource exported by the sample device plugin to become available on the local node (instances: %d)" , expectedSampleDevsAmount ) )
2022-12-12 04:11:10 -05:00
gomega . Eventually ( ctx , func ( ctx context . Context ) bool {
node , ready := getLocalTestNode ( ctx , f )
e2e: node: devplugin: tolerate node readiness flip
In the AfterEach check of the e2e node device plugin tests,
the tests want really bad to clean up after themselves:
- delete the sample device plugin
- restart again the kubelet
- ensure that after the restart, no stale sample devices
(provided by the sample device plugin) are reported anymore.
We observed that in the AfterEach block of these e2e tests
we have quite reliably a flip/flop of the kubelet readiness
state, possibly related to a race with/ a slow runtime/PLEG check.
What happens is that the kubelet readiness state is true,
but goes false for a quick interval and then goes true again
and it's pretty stable after that (observed adding more logs
to the check loop).
The key factor here is the function `getLocalNode` aborts the
test (as in `framework.ExpectNoError`) if the node state is
not ready. So any occurrence of this scenario, even if it
is transient, will cause a test failure. I believe this will
make the e2e test unnecessarily fragile without making it more
correct.
For the purpose of the test we can tolerate this kind of glitches,
with kubelet flip/flopping the ready state, granted that we meet
eventually the final desired condition on which the node reports
ready AND reports no sample devices present - which was the condition
the code was trying to check.
So, we add a variant of `getLocalNode`, which just fetches the
node object the e2e_node framework created, alongside to a flag
reporting the node readiness. The new helper does not make
implicitly the test abort if the node is not ready, just bubbles
up this information.
Signed-off-by: Francesco Romani <fromani@redhat.com>
2022-05-15 03:44:30 -04:00
return ready &&
2023-03-13 01:18:05 -04:00
CountSampleDeviceCapacity ( node ) == expectedSampleDevsAmount &&
CountSampleDeviceAllocatable ( node ) == expectedSampleDevsAmount
2024-07-31 11:58:15 -04:00
} , 30 * time . Second , framework . Poll ) . Should ( gomega . BeTrueBecause ( "expected resource to be available on local node" ) )
2021-11-10 11:21:39 -05:00
} )
2022-12-12 04:11:10 -05:00
ginkgo . AfterEach ( func ( ctx context . Context ) {
2021-11-10 11:21:39 -05:00
ginkgo . By ( "Deleting the device plugin pod" )
2025-01-20 11:26:04 -05:00
e2epod . NewPodClient ( f ) . DeleteSync ( ctx , devicePluginPod . Name , metav1 . DeleteOptions { } , f . Timeouts . PodDelete )
2021-11-10 11:21:39 -05:00
ginkgo . By ( "Deleting any Pods created by the test" )
2022-12-12 04:11:10 -05:00
l , err := e2epod . NewPodClient ( f ) . List ( ctx , metav1 . ListOptions { } )
2021-11-10 11:21:39 -05:00
framework . ExpectNoError ( err )
for _ , p := range l . Items {
if p . Namespace != f . Namespace . Name {
continue
}
framework . Logf ( "Deleting pod: %s" , p . Name )
2025-01-20 11:26:04 -05:00
e2epod . NewPodClient ( f ) . DeleteSync ( ctx , p . Name , metav1 . DeleteOptions { } , f . Timeouts . PodDelete )
2021-11-10 11:21:39 -05:00
}
2024-11-05 09:09:04 -05:00
restartKubelet ( ctx , true )
2021-11-10 11:21:39 -05:00
ginkgo . By ( "Waiting for devices to become unavailable on the local node" )
2022-12-12 04:11:10 -05:00
gomega . Eventually ( ctx , func ( ctx context . Context ) bool {
node , ready := getLocalTestNode ( ctx , f )
2023-02-21 07:50:33 -05:00
return ready && CountSampleDeviceCapacity ( node ) <= 0
2024-07-31 11:58:15 -04:00
} , 5 * time . Minute , framework . Poll ) . Should ( gomega . BeTrueBecause ( "expected devices to be unavailable on local node" ) )
e2e: node: devplugin: tolerate node readiness flip
In the AfterEach check of the e2e node device plugin tests,
the tests want really bad to clean up after themselves:
- delete the sample device plugin
- restart again the kubelet
- ensure that after the restart, no stale sample devices
(provided by the sample device plugin) are reported anymore.
We observed that in the AfterEach block of these e2e tests
we have quite reliably a flip/flop of the kubelet readiness
state, possibly related to a race with/ a slow runtime/PLEG check.
What happens is that the kubelet readiness state is true,
but goes false for a quick interval and then goes true again
and it's pretty stable after that (observed adding more logs
to the check loop).
The key factor here is the function `getLocalNode` aborts the
test (as in `framework.ExpectNoError`) if the node state is
not ready. So any occurrence of this scenario, even if it
is transient, will cause a test failure. I believe this will
make the e2e test unnecessarily fragile without making it more
correct.
For the purpose of the test we can tolerate this kind of glitches,
with kubelet flip/flopping the ready state, granted that we meet
eventually the final desired condition on which the node reports
ready AND reports no sample devices present - which was the condition
the code was trying to check.
So, we add a variant of `getLocalNode`, which just fetches the
node object the e2e_node framework created, alongside to a flag
reporting the node readiness. The new helper does not make
implicitly the test abort if the node is not ready, just bubbles
up this information.
Signed-off-by: Francesco Romani <fromani@redhat.com>
2022-05-15 03:44:30 -04:00
ginkgo . By ( "devices now unavailable on the local node" )
2021-11-10 11:21:39 -05:00
} )
2018-01-11 01:41:45 -05:00
2022-10-17 08:47:15 -04:00
ginkgo . It ( "Can schedule a pod that requires a device" , func ( ctx context . Context ) {
2023-03-13 05:42:30 -04:00
podRECMD := fmt . Sprintf ( "devs=$(ls /tmp/ | egrep '^Dev-[0-9]+$') && echo stub devices: $devs && sleep %s" , sleepIntervalWithRestart )
2023-02-21 07:50:33 -05:00
pod1 := e2epod . NewPodClient ( f ) . CreateSync ( ctx , makeBusyboxPod ( SampleDeviceResourceName , podRECMD ) )
2018-01-11 01:41:45 -05:00
deviceIDRE := "stub devices: (Dev-[0-9]+)"
2023-03-13 00:54:11 -04:00
devID1 , err := parseLog ( ctx , f , pod1 . Name , pod1 . Name , deviceIDRE )
framework . ExpectNoError ( err , "getting logs for pod %q" , pod1 . Name )
2023-03-13 04:32:32 -04:00
gomega . Expect ( devID1 ) . To ( gomega . Not ( gomega . Equal ( "" ) ) , "pod1 requested a device but started successfully without" )
2018-01-11 01:41:45 -05:00
2022-12-12 04:11:10 -05:00
v1alphaPodResources , err = getV1alpha1NodeDevices ( ctx )
2019-12-04 22:01:32 -05:00
framework . ExpectNoError ( err )
2020-10-10 19:03:31 -04:00
2022-12-12 04:11:10 -05:00
v1PodResources , err = getV1NodeDevices ( ctx )
2020-10-10 19:03:31 -04:00
framework . ExpectNoError ( err )
2022-10-19 08:50:26 -04:00
framework . Logf ( "v1alphaPodResources.PodResources:%+v\n" , v1alphaPodResources . PodResources )
framework . Logf ( "v1PodResources.PodResources:%+v\n" , v1PodResources . PodResources )
framework . Logf ( "len(v1alphaPodResources.PodResources):%+v" , len ( v1alphaPodResources . PodResources ) )
framework . Logf ( "len(v1PodResources.PodResources):%+v" , len ( v1PodResources . PodResources ) )
2023-10-09 04:42:42 -04:00
gomega . Expect ( v1alphaPodResources . PodResources ) . To ( gomega . HaveLen ( 2 ) )
gomega . Expect ( v1PodResources . PodResources ) . To ( gomega . HaveLen ( 2 ) )
2020-10-10 19:03:31 -04:00
var v1alphaResourcesForOurPod * kubeletpodresourcesv1alpha1 . PodResources
for _ , res := range v1alphaPodResources . GetPodResources ( ) {
if res . Name == pod1 . Name {
v1alphaResourcesForOurPod = res
}
}
var v1ResourcesForOurPod * kubeletpodresourcesv1 . PodResources
for _ , res := range v1PodResources . GetPodResources ( ) {
2018-08-28 02:41:42 -04:00
if res . Name == pod1 . Name {
2020-10-10 19:03:31 -04:00
v1ResourcesForOurPod = res
2018-08-28 02:41:42 -04:00
}
}
2020-10-10 19:03:31 -04:00
gomega . Expect ( v1alphaResourcesForOurPod ) . NotTo ( gomega . BeNil ( ) )
gomega . Expect ( v1ResourcesForOurPod ) . NotTo ( gomega . BeNil ( ) )
2023-10-09 04:42:42 -04:00
gomega . Expect ( v1alphaResourcesForOurPod . Name ) . To ( gomega . Equal ( pod1 . Name ) )
gomega . Expect ( v1ResourcesForOurPod . Name ) . To ( gomega . Equal ( pod1 . Name ) )
2020-10-10 19:03:31 -04:00
2023-10-09 04:42:42 -04:00
gomega . Expect ( v1alphaResourcesForOurPod . Namespace ) . To ( gomega . Equal ( pod1 . Namespace ) )
gomega . Expect ( v1ResourcesForOurPod . Namespace ) . To ( gomega . Equal ( pod1 . Namespace ) )
2020-10-10 19:03:31 -04:00
2023-10-09 04:42:42 -04:00
gomega . Expect ( v1alphaResourcesForOurPod . Containers ) . To ( gomega . HaveLen ( 1 ) )
gomega . Expect ( v1ResourcesForOurPod . Containers ) . To ( gomega . HaveLen ( 1 ) )
2020-10-10 19:03:31 -04:00
2023-10-09 04:42:42 -04:00
gomega . Expect ( v1alphaResourcesForOurPod . Containers [ 0 ] . Name ) . To ( gomega . Equal ( pod1 . Spec . Containers [ 0 ] . Name ) )
gomega . Expect ( v1ResourcesForOurPod . Containers [ 0 ] . Name ) . To ( gomega . Equal ( pod1 . Spec . Containers [ 0 ] . Name ) )
2020-10-10 19:03:31 -04:00
2023-10-09 04:42:42 -04:00
gomega . Expect ( v1alphaResourcesForOurPod . Containers [ 0 ] . Devices ) . To ( gomega . HaveLen ( 1 ) )
gomega . Expect ( v1ResourcesForOurPod . Containers [ 0 ] . Devices ) . To ( gomega . HaveLen ( 1 ) )
2020-10-10 19:03:31 -04:00
2023-10-09 04:42:42 -04:00
gomega . Expect ( v1alphaResourcesForOurPod . Containers [ 0 ] . Devices [ 0 ] . ResourceName ) . To ( gomega . Equal ( SampleDeviceResourceName ) )
gomega . Expect ( v1ResourcesForOurPod . Containers [ 0 ] . Devices [ 0 ] . ResourceName ) . To ( gomega . Equal ( SampleDeviceResourceName ) )
2020-10-10 19:03:31 -04:00
2023-10-09 04:42:42 -04:00
gomega . Expect ( v1alphaResourcesForOurPod . Containers [ 0 ] . Devices [ 0 ] . DeviceIds ) . To ( gomega . HaveLen ( 1 ) )
gomega . Expect ( v1ResourcesForOurPod . Containers [ 0 ] . Devices [ 0 ] . DeviceIds ) . To ( gomega . HaveLen ( 1 ) )
2021-11-10 11:21:39 -05:00
} )
2018-11-13 22:25:56 -05:00
2025-06-03 22:10:57 -04:00
f . It ( "can make a CDI device accessible in a container" , func ( ctx context . Context ) {
2023-10-23 06:47:30 -04:00
// check if CDI_DEVICE env variable is set
// and only one correspondent device node /tmp/<CDI_DEVICE> is available inside a container
podObj := makeBusyboxPod ( SampleDeviceResourceName , "[ $(ls /tmp/CDI-Dev-[1,2] | wc -l) -eq 1 -a -b /tmp/$CDI_DEVICE ]" )
podObj . Spec . RestartPolicy = v1 . RestartPolicyNever
pod := e2epod . NewPodClient ( f ) . Create ( ctx , podObj )
framework . ExpectNoError ( e2epod . WaitForPodSuccessInNamespace ( ctx , f . ClientSet , pod . Name , pod . Namespace ) )
} )
2023-03-09 13:20:46 -05:00
// simulate container restart, while all other involved components (kubelet, device plugin) stay stable. To do so, in the container
// entry point we sleep for a limited and short period of time. The device assignment should be kept and be stable across the container
// restarts. For the sake of brevity we however check just the fist restart.
2023-06-15 11:41:42 -04:00
ginkgo . It ( "Keeps device plugin assignments across pod restarts (no kubelet restart, no device plugin restart)" , func ( ctx context . Context ) {
2023-03-13 05:42:30 -04:00
podRECMD := fmt . Sprintf ( "devs=$(ls /tmp/ | egrep '^Dev-[0-9]+$') && echo stub devices: $devs && sleep %s" , sleepIntervalWithRestart )
2023-02-21 07:50:33 -05:00
pod1 := e2epod . NewPodClient ( f ) . CreateSync ( ctx , makeBusyboxPod ( SampleDeviceResourceName , podRECMD ) )
2021-11-10 11:21:39 -05:00
deviceIDRE := "stub devices: (Dev-[0-9]+)"
2023-03-13 00:54:11 -04:00
devID1 , err := parseLog ( ctx , f , pod1 . Name , pod1 . Name , deviceIDRE )
framework . ExpectNoError ( err , "getting logs for pod %q" , pod1 . Name )
2023-03-13 04:32:32 -04:00
gomega . Expect ( devID1 ) . To ( gomega . Not ( gomega . Equal ( "" ) ) , "pod1 requested a device but started successfully without" )
2021-11-10 11:21:39 -05:00
2023-03-13 00:54:11 -04:00
pod1 , err = e2epod . NewPodClient ( f ) . Get ( ctx , pod1 . Name , metav1 . GetOptions { } )
2018-01-11 01:41:45 -05:00
framework . ExpectNoError ( err )
2023-03-09 13:20:46 -05:00
ginkgo . By ( "Waiting for container to restart" )
2022-12-12 04:11:10 -05:00
ensurePodContainerRestart ( ctx , f , pod1 . Name , pod1 . Name )
2018-04-25 03:44:27 -04:00
2023-03-09 13:20:46 -05:00
// check from the device assignment is preserved and stable from perspective of the container
ginkgo . By ( "Confirming that after a container restart, fake-device assignment is kept" )
devIDRestart1 , err := parseLog ( ctx , f , pod1 . Name , pod1 . Name , deviceIDRE )
2023-03-13 00:54:11 -04:00
framework . ExpectNoError ( err , "getting logs for pod %q" , pod1 . Name )
2023-10-09 04:42:42 -04:00
gomega . Expect ( devIDRestart1 ) . To ( gomega . Equal ( devID1 ) )
2018-04-25 03:44:27 -04:00
2023-03-09 13:20:46 -05:00
// crosscheck from the device assignment is preserved and stable from perspective of the kubelet.
// needs to match the container perspective.
ginkgo . By ( "Verifying the device assignment after container restart using podresources API" )
v1PodResources , err = getV1NodeDevices ( ctx )
if err != nil {
framework . ExpectNoError ( err , "getting pod resources assignment after pod restart" )
}
2023-06-21 13:20:58 -04:00
err , _ = checkPodResourcesAssignment ( v1PodResources , pod1 . Namespace , pod1 . Name , pod1 . Spec . Containers [ 0 ] . Name , SampleDeviceResourceName , [ ] string { devID1 } )
2023-03-09 13:20:46 -05:00
framework . ExpectNoError ( err , "inconsistent device assignment after pod restart" )
2018-01-11 01:41:45 -05:00
2023-03-09 13:20:46 -05:00
ginkgo . By ( "Creating another pod" )
pod2 := e2epod . NewPodClient ( f ) . CreateSync ( ctx , makeBusyboxPod ( SampleDeviceResourceName , podRECMD ) )
err = e2epod . WaitTimeoutForPodRunningInNamespace ( ctx , f . ClientSet , pod2 . Name , f . Namespace . Name , 1 * time . Minute )
framework . ExpectNoError ( err )
2021-11-10 11:21:39 -05:00
2023-03-09 13:20:46 -05:00
ginkgo . By ( "Checking that pod got a fake device" )
devID2 , err := parseLog ( ctx , f , pod2 . Name , pod2 . Name , deviceIDRE )
framework . ExpectNoError ( err , "getting logs for pod %q" , pod2 . Name )
gomega . Expect ( devID2 ) . To ( gomega . Not ( gomega . Equal ( "" ) ) , "pod2 requested a device but started successfully without" )
ginkgo . By ( "Verifying the device assignment after extra container start using podresources API" )
v1PodResources , err = getV1NodeDevices ( ctx )
if err != nil {
framework . ExpectNoError ( err , "getting pod resources assignment after pod restart" )
}
2023-06-21 13:20:58 -04:00
err , _ = checkPodResourcesAssignment ( v1PodResources , pod1 . Namespace , pod1 . Name , pod1 . Spec . Containers [ 0 ] . Name , SampleDeviceResourceName , [ ] string { devID1 } )
2023-03-09 13:20:46 -05:00
framework . ExpectNoError ( err , "inconsistent device assignment after extra container restart - pod1" )
2023-06-21 13:20:58 -04:00
err , _ = checkPodResourcesAssignment ( v1PodResources , pod2 . Namespace , pod2 . Name , pod2 . Spec . Containers [ 0 ] . Name , SampleDeviceResourceName , [ ] string { devID2 } )
2023-03-09 13:20:46 -05:00
framework . ExpectNoError ( err , "inconsistent device assignment after extra container restart - pod2" )
2021-11-10 11:21:39 -05:00
} )
2018-01-11 01:41:45 -05:00
2023-06-15 11:41:42 -04:00
// simulate kubelet restart. A compliant device plugin is expected to re-register, while the pod and the container stays running.
// The flow with buggy or slow device plugin is deferred to another test.
2023-03-13 02:50:28 -04:00
// The device assignment should be kept and be stable across the kubelet restart, because it's the kubelet which performs the device allocation,
// and both the device plugin and the actual consumer (container) are stable.
2023-06-15 11:41:42 -04:00
ginkgo . It ( "Keeps device plugin assignments across kubelet restarts (no pod restart, no device plugin restart)" , func ( ctx context . Context ) {
2023-03-13 02:50:28 -04:00
podRECMD := fmt . Sprintf ( "devs=$(ls /tmp/ | egrep '^Dev-[0-9]+$') && echo stub devices: $devs && sleep %s" , sleepIntervalForever )
pod1 := e2epod . NewPodClient ( f ) . CreateSync ( ctx , makeBusyboxPod ( SampleDeviceResourceName , podRECMD ) )
deviceIDRE := "stub devices: (Dev-[0-9]+)"
devID1 , err := parseLog ( ctx , f , pod1 . Name , pod1 . Name , deviceIDRE )
framework . ExpectNoError ( err , "getting logs for pod %q" , pod1 . Name )
gomega . Expect ( devID1 ) . To ( gomega . Not ( gomega . Equal ( "" ) ) , "pod1 requested a device but started successfully without" )
pod1 , err = e2epod . NewPodClient ( f ) . Get ( ctx , pod1 . Name , metav1 . GetOptions { } )
framework . ExpectNoError ( err )
2023-06-21 13:20:58 -04:00
framework . Logf ( "testing pod: pre-restart UID=%s namespace=%s name=%s ready=%v" , pod1 . UID , pod1 . Namespace , pod1 . Name , podutils . IsPodReady ( pod1 ) )
2023-03-13 02:50:28 -04:00
ginkgo . By ( "Restarting Kubelet" )
2024-11-05 09:09:04 -05:00
restartKubelet ( ctx , true )
2023-03-13 02:50:28 -04:00
ginkgo . By ( "Wait for node to be ready again" )
e2enode . WaitForAllNodesSchedulable ( ctx , f . ClientSet , 5 * time . Minute )
ginkgo . By ( "Waiting for resource to become available on the local node after restart" )
gomega . Eventually ( ctx , func ( ) bool {
node , ready := getLocalTestNode ( ctx , f )
return ready &&
CountSampleDeviceCapacity ( node ) == expectedSampleDevsAmount &&
CountSampleDeviceAllocatable ( node ) == expectedSampleDevsAmount
2024-07-31 11:58:15 -04:00
} , 30 * time . Second , framework . Poll ) . Should ( gomega . BeTrueBecause ( "expected resource to be available after restart" ) )
2023-03-13 02:50:28 -04:00
2023-06-15 11:41:42 -04:00
ginkgo . By ( "Checking the same instance of the pod is still running" )
gomega . Eventually ( ctx , getPodByName ) .
2023-04-26 11:59:57 -04:00
WithArguments ( f , pod1 . Name ) .
WithTimeout ( time . Minute ) .
2023-06-15 11:41:42 -04:00
Should ( BeTheSamePodStillRunning ( pod1 ) ,
"the same pod instance not running across kubelet restarts, workload should not be perturbed by kubelet restarts" )
2023-06-21 13:20:58 -04:00
pod2 , err := e2epod . NewPodClient ( f ) . Get ( ctx , pod1 . Name , metav1 . GetOptions { } )
2023-06-15 11:41:42 -04:00
framework . ExpectNoError ( err )
2023-06-21 13:20:58 -04:00
framework . Logf ( "testing pod: post-restart UID=%s namespace=%s name=%s ready=%v" , pod2 . UID , pod2 . Namespace , pod2 . Name , podutils . IsPodReady ( pod2 ) )
2023-03-13 02:50:28 -04:00
// crosscheck from the device assignment is preserved and stable from perspective of the kubelet.
// note we don't check again the logs of the container: the check is done at startup, the container
// never restarted (runs "forever" from this test timescale perspective) hence re-doing this check
// is useless.
ginkgo . By ( "Verifying the device assignment after kubelet restart using podresources API" )
gomega . Eventually ( ctx , func ( ) error {
v1PodResources , err = getV1NodeDevices ( ctx )
return err
} , 30 * time . Second , framework . Poll ) . ShouldNot ( gomega . HaveOccurred ( ) , "cannot fetch the compute resource assignment after kubelet restart" )
2023-06-21 13:20:58 -04:00
err , _ = checkPodResourcesAssignment ( v1PodResources , pod2 . Namespace , pod2 . Name , pod2 . Spec . Containers [ 0 ] . Name , SampleDeviceResourceName , [ ] string { devID1 } )
2023-03-13 02:50:28 -04:00
framework . ExpectNoError ( err , "inconsistent device assignment after pod restart" )
} )
2023-06-15 11:41:42 -04:00
// simulate kubelet and container restart, *but not* device plugin restart.
2023-04-27 06:26:50 -04:00
// The device assignment should be kept and be stable across the kubelet and container restart, because it's the kubelet which
// performs the device allocation, and both the device plugin is stable.
2023-06-15 11:41:42 -04:00
ginkgo . It ( "Keeps device plugin assignments across pod and kubelet restarts (no device plugin restart)" , func ( ctx context . Context ) {
2023-04-27 06:26:50 -04:00
podRECMD := fmt . Sprintf ( "devs=$(ls /tmp/ | egrep '^Dev-[0-9]+$') && echo stub devices: $devs && sleep %s" , sleepIntervalWithRestart )
pod1 := e2epod . NewPodClient ( f ) . CreateSync ( ctx , makeBusyboxPod ( SampleDeviceResourceName , podRECMD ) )
deviceIDRE := "stub devices: (Dev-[0-9]+)"
devID1 , err := parseLog ( ctx , f , pod1 . Name , pod1 . Name , deviceIDRE )
framework . ExpectNoError ( err , "getting logs for pod %q" , pod1 . Name )
gomega . Expect ( devID1 ) . To ( gomega . Not ( gomega . Equal ( "" ) ) , "pod1 requested a device but started successfully without" )
pod1 , err = e2epod . NewPodClient ( f ) . Get ( ctx , pod1 . Name , metav1 . GetOptions { } )
framework . ExpectNoError ( err )
ginkgo . By ( "Wait for node to be ready again" )
e2enode . WaitForAllNodesSchedulable ( ctx , f . ClientSet , 5 * time . Minute )
ginkgo . By ( "Waiting for container to restart" )
ensurePodContainerRestart ( ctx , f , pod1 . Name , pod1 . Name )
ginkgo . By ( "Confirming that after a container restart, fake-device assignment is kept" )
devIDRestart1 , err := parseLog ( ctx , f , pod1 . Name , pod1 . Name , deviceIDRE )
framework . ExpectNoError ( err , "getting logs for pod %q" , pod1 . Name )
2023-10-09 04:42:42 -04:00
gomega . Expect ( devIDRestart1 ) . To ( gomega . Equal ( devID1 ) )
2023-04-27 06:26:50 -04:00
ginkgo . By ( "Restarting Kubelet" )
2024-11-05 09:09:04 -05:00
restartKubelet ( ctx , true )
2023-04-27 06:26:50 -04:00
ginkgo . By ( "Wait for node to be ready again" )
e2enode . WaitForAllNodesSchedulable ( ctx , f . ClientSet , 5 * time . Minute )
2023-06-15 11:41:42 -04:00
ginkgo . By ( "Checking an instance of the pod is running" )
gomega . Eventually ( ctx , getPodByName ) .
2023-04-27 06:26:50 -04:00
WithArguments ( f , pod1 . Name ) .
2023-09-22 12:37:32 -04:00
// The kubelet restarts pod with an exponential back-off delay, with a maximum cap of 5 minutes.
// Allow 5 minutes and 10 seconds for the pod to start in a slow environment.
WithTimeout ( 5 * time . Minute + 10 * time . Second ) .
2023-06-15 11:41:42 -04:00
Should ( gomega . And (
BeAPodInPhase ( v1 . PodRunning ) ,
BeAPodReady ( ) ,
) ,
"the pod should still be running, the workload should not be perturbed by kubelet restarts" )
ginkgo . By ( "Verifying the device assignment after pod and kubelet restart using container logs" )
var devID1Restarted string
gomega . Eventually ( ctx , func ( ) string {
devID1Restarted , err = parseLog ( ctx , f , pod1 . Name , pod1 . Name , deviceIDRE )
if err != nil {
framework . Logf ( "error getting logds for pod %q: %v" , pod1 . Name , err )
return ""
}
return devID1Restarted
} , 30 * time . Second , framework . Poll ) . Should ( gomega . Equal ( devID1 ) , "pod %s reports a different device after restarts: %s (expected %s)" , pod1 . Name , devID1Restarted , devID1 )
2023-04-27 06:26:50 -04:00
2023-06-15 11:41:42 -04:00
ginkgo . By ( "Verifying the device assignment after pod and kubelet restart using podresources API" )
2023-04-27 06:26:50 -04:00
gomega . Eventually ( ctx , func ( ) error {
v1PodResources , err = getV1NodeDevices ( ctx )
return err
} , 30 * time . Second , framework . Poll ) . ShouldNot ( gomega . HaveOccurred ( ) , "cannot fetch the compute resource assignment after kubelet restart" )
2023-06-21 13:20:58 -04:00
err , _ = checkPodResourcesAssignment ( v1PodResources , pod1 . Namespace , pod1 . Name , pod1 . Spec . Containers [ 0 ] . Name , SampleDeviceResourceName , [ ] string { devID1 } )
2023-04-27 06:26:50 -04:00
framework . ExpectNoError ( err , "inconsistent device assignment after pod restart" )
} )
2024-07-24 04:49:18 -04:00
ginkgo . It ( "will not attempt to admit the succeeded pod after the kubelet restart and device plugin removed" , func ( ctx context . Context ) {
podRECMD := fmt . Sprintf ( "devs=$(ls /tmp/ | egrep '^Dev-[0-9]+$') && echo stub devices: $devs && sleep %s" , sleepIntervalToCompletion )
podSpec := makeBusyboxPod ( SampleDeviceResourceName , podRECMD )
podSpec . Spec . RestartPolicy = v1 . RestartPolicyNever
// Making sure the pod will not be garbage collected and will stay thru the kubelet restart after
// it reached the terminated state. Using finalizers makes the test more reliable.
podSpec . ObjectMeta . Finalizers = [ ] string { testFinalizer }
pod := e2epod . NewPodClient ( f ) . CreateSync ( ctx , podSpec )
deviceIDRE := "stub devices: (Dev-[0-9]+)"
devID1 , err := parseLog ( ctx , f , pod . Name , pod . Name , deviceIDRE )
framework . ExpectNoError ( err , "getting logs for pod %q" , pod . Name )
gomega . Expect ( devID1 ) . To ( gomega . Not ( gomega . Equal ( "" ) ) , "pod requested a device but started successfully without" )
pod , err = e2epod . NewPodClient ( f ) . Get ( ctx , pod . Name , metav1 . GetOptions { } )
framework . ExpectNoError ( err )
ginkgo . By ( "Wait for node to be ready" )
gomega . Expect ( e2enode . WaitForAllNodesSchedulable ( ctx , f . ClientSet , 5 * time . Minute ) ) . To ( gomega . Succeed ( ) )
ginkgo . By ( "Waiting for pod to succeed" )
gomega . Expect ( e2epod . WaitForPodSuccessInNamespace ( ctx , f . ClientSet , pod . Name , pod . Namespace ) ) . To ( gomega . Succeed ( ) )
ginkgo . By ( "Deleting the device plugin" )
2025-01-20 11:26:04 -05:00
e2epod . NewPodClient ( f ) . DeleteSync ( ctx , devicePluginPod . Name , metav1 . DeleteOptions { } , f . Timeouts . PodDelete )
2024-07-24 04:49:18 -04:00
waitForContainerRemoval ( ctx , devicePluginPod . Spec . Containers [ 0 ] . Name , devicePluginPod . Name , devicePluginPod . Namespace )
gomega . Eventually ( getNodeResourceValues , devicePluginGracefulTimeout , f . Timeouts . Poll ) . WithContext ( ctx ) . WithArguments ( SampleDeviceResourceName ) . Should ( gomega . Equal ( ResourceValue { Allocatable : 0 , Capacity : int ( expectedSampleDevsAmount ) } ) )
ginkgo . By ( "Restarting Kubelet" )
2024-11-05 09:09:04 -05:00
restartKubelet ( ctx , true )
2024-07-24 04:49:18 -04:00
ginkgo . By ( "Wait for node to be ready again" )
gomega . Expect ( e2enode . WaitForAllNodesSchedulable ( ctx , f . ClientSet , 5 * time . Minute ) ) . To ( gomega . Succeed ( ) )
ginkgo . By ( "Pod should still be in Succeed state" )
// This ensures that the pod was admitted successfully.
// In the past we had and issue when kubelet will attempt to re-admit the terminated pod and will change it's phase to Failed.
// There are no indication that the pod was re-admitted so we just wait for a minute after the node became ready.
gomega . Consistently ( func ( ) v1 . PodPhase {
pod , err = f . ClientSet . CoreV1 ( ) . Pods ( f . Namespace . Name ) . Get ( ctx , pod . Name , metav1 . GetOptions { } )
return pod . Status . Phase
} , 1 * time . Minute , f . Timeouts . Poll ) . Should ( gomega . Equal ( v1 . PodSucceeded ) )
ginkgo . By ( "Removing the finalizer from the pod so it can be deleted now" )
e2epod . NewPodClient ( f ) . RemoveFinalizer ( context . TODO ( ) , podSpec . Name , testFinalizer )
} )
2023-04-27 06:26:50 -04:00
// simulate device plugin re-registration, *but not* container and kubelet restart.
// After the device plugin has re-registered, the list healthy devices is repopulated based on the devices discovered.
// Once Pod2 is running we determine the device that was allocated it. As long as the device allocation succeeds the
// test should pass.
2023-06-15 11:41:42 -04:00
ginkgo . It ( "Keeps device plugin assignments after the device plugin has restarted (no kubelet restart, pod restart)" , func ( ctx context . Context ) {
2023-04-27 06:26:50 -04:00
podRECMD := fmt . Sprintf ( "devs=$(ls /tmp/ | egrep '^Dev-[0-9]+$') && echo stub devices: $devs && sleep %s" , sleepIntervalForever )
pod1 := e2epod . NewPodClient ( f ) . CreateSync ( ctx , makeBusyboxPod ( SampleDeviceResourceName , podRECMD ) )
deviceIDRE := "stub devices: (Dev-[0-9]+)"
devID1 , err := parseLog ( ctx , f , pod1 . Name , pod1 . Name , deviceIDRE )
framework . ExpectNoError ( err , "getting logs for pod %q" , pod1 . Name )
gomega . Expect ( devID1 ) . To ( gomega . Not ( gomega . Equal ( "" ) ) , "pod1 requested a device but started successfully without" )
pod1 , err = e2epod . NewPodClient ( f ) . Get ( ctx , pod1 . Name , metav1 . GetOptions { } )
framework . ExpectNoError ( err )
ginkgo . By ( "Wait for node to be ready again" )
e2enode . WaitForAllNodesSchedulable ( ctx , f . ClientSet , 5 * time . Minute )
ginkgo . By ( "Re-Register resources and delete the plugin pod" )
gp := int64 ( 0 )
deleteOptions := metav1 . DeleteOptions {
GracePeriodSeconds : & gp ,
}
2025-01-20 11:26:04 -05:00
e2epod . NewPodClient ( f ) . DeleteSync ( ctx , devicePluginPod . Name , deleteOptions , f . Timeouts . PodDelete )
2023-04-27 06:26:50 -04:00
waitForContainerRemoval ( ctx , devicePluginPod . Spec . Containers [ 0 ] . Name , devicePluginPod . Name , devicePluginPod . Namespace )
ginkgo . By ( "Recreating the plugin pod" )
devicePluginPod = e2epod . NewPodClient ( f ) . CreateSync ( ctx , dptemplate )
err = e2epod . WaitTimeoutForPodRunningInNamespace ( ctx , f . ClientSet , devicePluginPod . Name , devicePluginPod . Namespace , 1 * time . Minute )
framework . ExpectNoError ( err )
ginkgo . By ( "Waiting for resource to become available on the local node after re-registration" )
gomega . Eventually ( ctx , func ( ) bool {
node , ready := getLocalTestNode ( ctx , f )
return ready &&
CountSampleDeviceCapacity ( node ) == expectedSampleDevsAmount &&
CountSampleDeviceAllocatable ( node ) == expectedSampleDevsAmount
2024-07-31 11:58:15 -04:00
} , 30 * time . Second , framework . Poll ) . Should ( gomega . BeTrueBecause ( "expected resource to be available after re-registration" ) )
2023-04-27 06:26:50 -04:00
2023-06-15 11:41:42 -04:00
// crosscheck that after device plugin restart the device assignment is preserved and
2023-04-27 06:26:50 -04:00
// stable from the kubelet's perspective.
// note we don't check again the logs of the container: the check is done at startup, the container
// never restarted (runs "forever" from this test timescale perspective) hence re-doing this check
// is useless.
2023-06-15 11:41:42 -04:00
ginkgo . By ( "Verifying the device assignment after device plugin restart using podresources API" )
2023-04-27 06:26:50 -04:00
gomega . Eventually ( ctx , func ( ) error {
v1PodResources , err = getV1NodeDevices ( ctx )
return err
} , 30 * time . Second , framework . Poll ) . ShouldNot ( gomega . HaveOccurred ( ) , "cannot fetch the compute resource assignment after kubelet restart" )
2023-06-21 13:20:58 -04:00
err , _ = checkPodResourcesAssignment ( v1PodResources , pod1 . Namespace , pod1 . Name , pod1 . Spec . Containers [ 0 ] . Name , SampleDeviceResourceName , [ ] string { devID1 } )
2023-04-27 06:26:50 -04:00
framework . ExpectNoError ( err , "inconsistent device assignment after pod restart" )
} )
2023-06-15 11:41:42 -04:00
// simulate kubelet restart *and* device plugin restart, while the pod and the container stays running.
2023-03-13 02:30:35 -04:00
// The device assignment should be kept and be stable across the kubelet/device plugin restart, as both the aforementioned components
2023-03-13 02:50:28 -04:00
// orchestrate the device allocation: the actual consumer (container) is stable.
2023-06-15 11:41:42 -04:00
ginkgo . It ( "Keeps device plugin assignments after kubelet restart and device plugin restart (no pod restart)" , func ( ctx context . Context ) {
2023-03-13 05:42:30 -04:00
podRECMD := fmt . Sprintf ( "devs=$(ls /tmp/ | egrep '^Dev-[0-9]+$') && echo stub devices: $devs && sleep %s" , sleepIntervalForever ) // the pod has to run "forever" in the timescale of this test
2023-02-21 07:50:33 -05:00
pod1 := e2epod . NewPodClient ( f ) . CreateSync ( ctx , makeBusyboxPod ( SampleDeviceResourceName , podRECMD ) )
2021-11-10 11:21:39 -05:00
deviceIDRE := "stub devices: (Dev-[0-9]+)"
2023-03-13 00:54:11 -04:00
devID1 , err := parseLog ( ctx , f , pod1 . Name , pod1 . Name , deviceIDRE )
framework . ExpectNoError ( err , "getting logs for pod %q" , pod1 . Name )
2023-06-15 11:41:42 -04:00
gomega . Expect ( devID1 ) . To ( gomega . Not ( gomega . BeEmpty ( ) ) , "pod1 requested a device but started successfully without" )
2021-11-10 11:21:39 -05:00
2023-03-13 00:54:11 -04:00
pod1 , err = e2epod . NewPodClient ( f ) . Get ( ctx , pod1 . Name , metav1 . GetOptions { } )
2021-11-10 11:21:39 -05:00
framework . ExpectNoError ( err )
ginkgo . By ( "Restarting Kubelet" )
2024-11-05 09:09:04 -05:00
restartKubelet ( ctx , true )
2021-11-10 11:21:39 -05:00
ginkgo . By ( "Wait for node to be ready again" )
2022-12-12 04:11:10 -05:00
e2enode . WaitForAllNodesSchedulable ( ctx , f . ClientSet , 5 * time . Minute )
2021-11-10 11:21:39 -05:00
2023-06-15 11:41:42 -04:00
ginkgo . By ( "Checking the same instance of the pod is still running after kubelet restart" )
gomega . Eventually ( ctx , getPodByName ) .
2023-04-26 11:59:57 -04:00
WithArguments ( f , pod1 . Name ) .
WithTimeout ( time . Minute ) .
2023-06-15 11:41:42 -04:00
Should ( BeTheSamePodStillRunning ( pod1 ) ,
"the same pod instance not running across kubelet restarts, workload should not be perturbed by kubelet restarts" )
2023-04-26 11:59:57 -04:00
// crosscheck from the device assignment is preserved and stable from perspective of the kubelet.
// note we don't check again the logs of the container: the check is done at startup, the container
// never restarted (runs "forever" from this test timescale perspective) hence re-doing this check
// is useless.
ginkgo . By ( "Verifying the device assignment after kubelet restart using podresources API" )
gomega . Eventually ( ctx , func ( ) error {
v1PodResources , err = getV1NodeDevices ( ctx )
return err
} , 30 * time . Second , framework . Poll ) . ShouldNot ( gomega . HaveOccurred ( ) , "cannot fetch the compute resource assignment after kubelet restart" )
2023-06-21 13:20:58 -04:00
err , _ = checkPodResourcesAssignment ( v1PodResources , pod1 . Namespace , pod1 . Name , pod1 . Spec . Containers [ 0 ] . Name , SampleDeviceResourceName , [ ] string { devID1 } )
2023-04-26 11:59:57 -04:00
framework . ExpectNoError ( err , "inconsistent device assignment after pod restart" )
2023-03-13 02:30:35 -04:00
ginkgo . By ( "Re-Register resources by deleting the plugin pod" )
2018-08-28 02:41:42 -04:00
gp := int64 ( 0 )
deleteOptions := metav1 . DeleteOptions {
GracePeriodSeconds : & gp ,
}
2025-01-20 11:26:04 -05:00
e2epod . NewPodClient ( f ) . DeleteSync ( ctx , devicePluginPod . Name , deleteOptions , f . Timeouts . PodDelete )
2022-12-12 04:11:10 -05:00
waitForContainerRemoval ( ctx , devicePluginPod . Spec . Containers [ 0 ] . Name , devicePluginPod . Name , devicePluginPod . Namespace )
2018-01-11 01:41:45 -05:00
2021-11-10 11:21:39 -05:00
ginkgo . By ( "Recreating the plugin pod" )
2022-12-12 04:11:10 -05:00
devicePluginPod = e2epod . NewPodClient ( f ) . CreateSync ( ctx , dptemplate )
2018-01-11 01:41:45 -05:00
2023-06-15 11:41:42 -04:00
ginkgo . By ( "Waiting for resource to become available on the local node after restart" )
2022-12-12 04:11:10 -05:00
gomega . Eventually ( ctx , func ( ) bool {
node , ready := getLocalTestNode ( ctx , f )
e2e: node: devplugin: tolerate node readiness flip
In the AfterEach check of the e2e node device plugin tests,
the tests want really bad to clean up after themselves:
- delete the sample device plugin
- restart again the kubelet
- ensure that after the restart, no stale sample devices
(provided by the sample device plugin) are reported anymore.
We observed that in the AfterEach block of these e2e tests
we have quite reliably a flip/flop of the kubelet readiness
state, possibly related to a race with/ a slow runtime/PLEG check.
What happens is that the kubelet readiness state is true,
but goes false for a quick interval and then goes true again
and it's pretty stable after that (observed adding more logs
to the check loop).
The key factor here is the function `getLocalNode` aborts the
test (as in `framework.ExpectNoError`) if the node state is
not ready. So any occurrence of this scenario, even if it
is transient, will cause a test failure. I believe this will
make the e2e test unnecessarily fragile without making it more
correct.
For the purpose of the test we can tolerate this kind of glitches,
with kubelet flip/flopping the ready state, granted that we meet
eventually the final desired condition on which the node reports
ready AND reports no sample devices present - which was the condition
the code was trying to check.
So, we add a variant of `getLocalNode`, which just fetches the
node object the e2e_node framework created, alongside to a flag
reporting the node readiness. The new helper does not make
implicitly the test abort if the node is not ready, just bubbles
up this information.
Signed-off-by: Francesco Romani <fromani@redhat.com>
2022-05-15 03:44:30 -04:00
return ready &&
2023-03-13 01:18:05 -04:00
CountSampleDeviceCapacity ( node ) == expectedSampleDevsAmount &&
CountSampleDeviceAllocatable ( node ) == expectedSampleDevsAmount
2024-07-31 11:58:15 -04:00
} , 30 * time . Second , framework . Poll ) . Should ( gomega . BeTrueBecause ( "expected resource to be available after restart" ) )
2018-01-11 01:41:45 -05:00
2023-06-15 11:41:42 -04:00
ginkgo . By ( "Checking the same instance of the pod is still running after the device plugin restart" )
gomega . Eventually ( ctx , getPodByName ) .
WithArguments ( f , pod1 . Name ) .
WithTimeout ( time . Minute ) .
Should ( BeTheSamePodStillRunning ( pod1 ) ,
"the same pod instance not running across kubelet restarts, workload should not be perturbed by device plugins restarts" )
2018-01-11 01:41:45 -05:00
} )
2023-06-21 13:20:58 -04:00
ginkgo . It ( "[OrphanedPods] Ensures pods consuming devices deleted while kubelet is down are cleaned up correctly" , func ( ctx context . Context ) {
podRECMD := fmt . Sprintf ( "devs=$(ls /tmp/ | egrep '^Dev-[0-9]+$') && echo stub devices: $devs && sleep %s" , sleepIntervalWithRestart )
pod := e2epod . NewPodClient ( f ) . CreateSync ( ctx , makeBusyboxPod ( SampleDeviceResourceName , podRECMD ) )
2018-01-11 01:41:45 -05:00
2023-06-21 13:20:58 -04:00
deviceIDRE := "stub devices: (Dev-[0-9]+)"
devID , err := parseLog ( ctx , f , pod . Name , pod . Name , deviceIDRE )
framework . ExpectNoError ( err , "getting logs for pod %q" , pod . Name )
gomega . Expect ( devID ) . To ( gomega . Not ( gomega . BeEmpty ( ) ) , "pod1 requested a device but started successfully without" )
2023-04-26 11:59:57 -04:00
2023-06-21 13:20:58 -04:00
pod , err = e2epod . NewPodClient ( f ) . Get ( ctx , pod . Name , metav1 . GetOptions { } )
framework . ExpectNoError ( err )
ginkgo . By ( "stopping the kubelet" )
2024-11-05 09:09:04 -05:00
restartKubelet := mustStopKubelet ( ctx , f )
2023-06-21 13:20:58 -04:00
// wait until the kubelet health check will fail
gomega . Eventually ( ctx , func ( ) bool {
ok := kubeletHealthCheck ( kubeletHealthCheckURL )
framework . Logf ( "kubelet health check at %q value=%v" , kubeletHealthCheckURL , ok )
return ok
2024-07-31 11:58:15 -04:00
} , f . Timeouts . PodStart , f . Timeouts . Poll ) . Should ( gomega . BeFalseBecause ( "expected kubelet health check to be failed" ) )
2023-06-21 13:20:58 -04:00
framework . Logf ( "Delete the pod while the kubelet is not running" )
// Delete pod sync by name will force delete the pod, removing it from kubelet's config
deletePodSyncByName ( ctx , f , pod . Name )
framework . Logf ( "Starting the kubelet" )
2024-11-05 09:09:04 -05:00
restartKubelet ( ctx )
2023-06-21 13:20:58 -04:00
// wait until the kubelet health check will succeed
gomega . Eventually ( ctx , func ( ) bool {
ok := kubeletHealthCheck ( kubeletHealthCheckURL )
framework . Logf ( "kubelet health check at %q value=%v" , kubeletHealthCheckURL , ok )
return ok
2024-07-31 11:58:15 -04:00
} , f . Timeouts . PodStart , f . Timeouts . Poll ) . Should ( gomega . BeTrueBecause ( "expected kubelet to be in healthy state" ) )
2023-06-21 13:20:58 -04:00
framework . Logf ( "wait for the pod %v to disappear" , pod . Name )
gomega . Eventually ( ctx , func ( ctx context . Context ) error {
err := checkMirrorPodDisappear ( ctx , f . ClientSet , pod . Name , pod . Namespace )
framework . Logf ( "pod %s/%s disappear check err=%v" , pod . Namespace , pod . Name , err )
return err
} , f . Timeouts . PodDelete , f . Timeouts . Poll ) . Should ( gomega . BeNil ( ) )
waitForAllContainerRemoval ( ctx , pod . Name , pod . Namespace )
ginkgo . By ( "Verifying the device assignment after device plugin restart using podresources API" )
gomega . Eventually ( ctx , func ( ) error {
v1PodResources , err = getV1NodeDevices ( ctx )
return err
} , 30 * time . Second , framework . Poll ) . ShouldNot ( gomega . HaveOccurred ( ) , "cannot fetch the compute resource assignment after kubelet restart" )
err , allocated := checkPodResourcesAssignment ( v1PodResources , pod . Namespace , pod . Name , pod . Spec . Containers [ 0 ] . Name , SampleDeviceResourceName , [ ] string { } )
if err == nil || allocated {
framework . Fail ( fmt . Sprintf ( "stale device assignment after pod deletion while kubelet was down allocated=%v error=%v" , allocated , err ) )
}
2018-01-11 01:41:45 -05:00
} )
2023-09-06 10:19:50 -04:00
2024-12-11 16:11:51 -05:00
f . It ( "Can schedule a pod with a restartable init container" , feature . SidecarContainers , func ( ctx context . Context ) {
2023-09-06 10:19:50 -04:00
podRECMD := "devs=$(ls /tmp/ | egrep '^Dev-[0-9]+$') && echo stub devices: $devs && sleep %s"
sleepOneSecond := "1s"
rl := v1 . ResourceList { v1 . ResourceName ( SampleDeviceResourceName ) : * resource . NewQuantity ( 1 , resource . DecimalSI ) }
pod := & v1 . Pod {
ObjectMeta : metav1 . ObjectMeta { Name : "device-plugin-test-" + string ( uuid . NewUUID ( ) ) } ,
Spec : v1 . PodSpec {
RestartPolicy : v1 . RestartPolicyAlways ,
InitContainers : [ ] v1 . Container {
{
Image : busyboxImage ,
Name : "init-1" ,
Command : [ ] string { "sh" , "-c" , fmt . Sprintf ( podRECMD , sleepOneSecond ) } ,
Resources : v1 . ResourceRequirements {
Limits : rl ,
Requests : rl ,
} ,
} ,
{
Image : busyboxImage ,
Name : "restartable-init-2" ,
Command : [ ] string { "sh" , "-c" , fmt . Sprintf ( podRECMD , sleepIntervalForever ) } ,
Resources : v1 . ResourceRequirements {
Limits : rl ,
Requests : rl ,
} ,
RestartPolicy : & containerRestartPolicyAlways ,
} ,
} ,
Containers : [ ] v1 . Container { {
Image : busyboxImage ,
Name : "regular-1" ,
Command : [ ] string { "sh" , "-c" , fmt . Sprintf ( podRECMD , sleepIntervalForever ) } ,
Resources : v1 . ResourceRequirements {
Limits : rl ,
Requests : rl ,
} ,
} } ,
} ,
}
pod1 := e2epod . NewPodClient ( f ) . CreateSync ( ctx , pod )
deviceIDRE := "stub devices: (Dev-[0-9]+)"
devID1 , err := parseLog ( ctx , f , pod1 . Name , pod1 . Spec . InitContainers [ 0 ] . Name , deviceIDRE )
framework . ExpectNoError ( err , "getting logs for pod %q/%q" , pod1 . Name , pod1 . Spec . InitContainers [ 0 ] . Name )
gomega . Expect ( devID1 ) . To ( gomega . Not ( gomega . Equal ( "" ) ) , "pod1's init container requested a device but started successfully without" )
devID2 , err := parseLog ( ctx , f , pod1 . Name , pod1 . Spec . InitContainers [ 1 ] . Name , deviceIDRE )
framework . ExpectNoError ( err , "getting logs for pod %q/%q" , pod1 . Name , pod1 . Spec . InitContainers [ 1 ] . Name )
gomega . Expect ( devID2 ) . To ( gomega . Not ( gomega . Equal ( "" ) ) , "pod1's restartable init container requested a device but started successfully without" )
gomega . Expect ( devID2 ) . To ( gomega . Equal ( devID1 ) , "pod1's init container and restartable init container should share the same device" )
devID3 , err := parseLog ( ctx , f , pod1 . Name , pod1 . Spec . Containers [ 0 ] . Name , deviceIDRE )
framework . ExpectNoError ( err , "getting logs for pod %q/%q" , pod1 . Name , pod1 . Spec . Containers [ 0 ] . Name )
gomega . Expect ( devID3 ) . To ( gomega . Not ( gomega . Equal ( "" ) ) , "pod1's regular container requested a device but started successfully without" )
gomega . Expect ( devID3 ) . NotTo ( gomega . Equal ( devID2 ) , "pod1's restartable init container and regular container should not share the same device" )
podResources , err := getV1NodeDevices ( ctx )
framework . ExpectNoError ( err )
framework . Logf ( "PodResources.PodResources:%+v\n" , podResources . PodResources )
framework . Logf ( "len(PodResources.PodResources):%+v" , len ( podResources . PodResources ) )
gomega . Expect ( podResources . PodResources ) . To ( gomega . HaveLen ( 2 ) )
var resourcesForOurPod * kubeletpodresourcesv1 . PodResources
for _ , res := range podResources . GetPodResources ( ) {
if res . Name == pod1 . Name {
resourcesForOurPod = res
}
}
gomega . Expect ( resourcesForOurPod ) . NotTo ( gomega . BeNil ( ) )
gomega . Expect ( resourcesForOurPod . Name ) . To ( gomega . Equal ( pod1 . Name ) )
gomega . Expect ( resourcesForOurPod . Namespace ) . To ( gomega . Equal ( pod1 . Namespace ) )
2023-09-17 07:17:58 -04:00
gomega . Expect ( resourcesForOurPod . Containers ) . To ( gomega . HaveLen ( 2 ) )
for _ , container := range resourcesForOurPod . Containers {
if container . Name == pod1 . Spec . InitContainers [ 1 ] . Name {
gomega . Expect ( container . Devices ) . To ( gomega . HaveLen ( 1 ) )
gomega . Expect ( container . Devices [ 0 ] . ResourceName ) . To ( gomega . Equal ( SampleDeviceResourceName ) )
gomega . Expect ( container . Devices [ 0 ] . DeviceIds ) . To ( gomega . HaveLen ( 1 ) )
} else if container . Name == pod1 . Spec . Containers [ 0 ] . Name {
gomega . Expect ( container . Devices ) . To ( gomega . HaveLen ( 1 ) )
gomega . Expect ( container . Devices [ 0 ] . ResourceName ) . To ( gomega . Equal ( SampleDeviceResourceName ) )
gomega . Expect ( container . Devices [ 0 ] . DeviceIds ) . To ( gomega . HaveLen ( 1 ) )
} else {
framework . Failf ( "unexpected container name: %s" , container . Name )
}
}
2023-09-06 10:19:50 -04:00
} )
2018-01-11 01:41:45 -05:00
} )
2018-01-24 12:06:07 -05:00
}
2018-01-11 01:41:45 -05:00
2023-04-26 12:41:14 -04:00
func testDevicePluginNodeReboot ( f * framework . Framework , pluginSockDir string ) {
2023-06-20 04:27:14 -04:00
f . Context ( "DevicePlugin" , f . WithSerial ( ) , f . WithDisruptive ( ) , func ( ) {
2023-04-26 12:41:14 -04:00
var devicePluginPod * v1 . Pod
var v1PodResources * kubeletpodresourcesv1 . ListPodResourcesResponse
var triggerPathFile , triggerPathDir string
var err error
2023-06-15 11:41:42 -04:00
2023-04-26 12:41:14 -04:00
ginkgo . BeforeEach ( func ( ctx context . Context ) {
ginkgo . By ( "Wait for node to be ready" )
2024-08-26 05:32:13 -04:00
gomega . Eventually ( ctx , e2enode . TotalReady ) .
WithArguments ( f . ClientSet ) .
WithTimeout ( time . Minute ) .
Should ( gomega . BeEquivalentTo ( 1 ) )
2023-04-26 12:41:14 -04:00
// Before we run the device plugin test, we need to ensure
// that the cluster is in a clean state and there are no
// pods running on this node.
// This is done in a gomega.Eventually with retries since a prior test in a different test suite could've run and the deletion of it's resources may still be in progress.
// xref: https://issue.k8s.io/115381
gomega . Eventually ( ctx , func ( ctx context . Context ) error {
v1PodResources , err = getV1NodeDevices ( ctx )
if err != nil {
return fmt . Errorf ( "failed to get node local podresources by accessing the (v1) podresources API endpoint: %v" , err )
}
if len ( v1PodResources . PodResources ) > 0 {
return fmt . Errorf ( "expected v1 pod resources to be empty, but got non-empty resources: %+v" , v1PodResources . PodResources )
}
return nil
2023-06-15 11:41:42 -04:00
} , f . Timeouts . SystemDaemonsetStartup , f . Timeouts . Poll ) . Should ( gomega . Succeed ( ) )
2023-04-26 12:41:14 -04:00
2023-06-21 13:20:05 -04:00
ginkgo . By ( "Setting up the directory for controlling registration" )
2023-04-26 12:41:14 -04:00
triggerPathDir = filepath . Join ( devicePluginDir , "sample" )
2023-06-21 13:20:05 -04:00
if _ , err := os . Stat ( triggerPathDir ) ; err != nil {
if errors . Is ( err , os . ErrNotExist ) {
if err := os . Mkdir ( triggerPathDir , os . ModePerm ) ; err != nil {
framework . Fail ( fmt . Sprintf ( "registration control directory %q creation failed: %v " , triggerPathDir , err ) )
}
framework . Logf ( "registration control directory created successfully" )
} else {
framework . Fail ( fmt . Sprintf ( "unexpected error checking %q: %v" , triggerPathDir , err ) )
2023-04-26 12:41:14 -04:00
}
2023-06-21 13:20:05 -04:00
} else {
framework . Logf ( "registration control directory %q already present" , triggerPathDir )
}
ginkgo . By ( "Setting up the file trigger for controlling registration" )
triggerPathFile = filepath . Join ( triggerPathDir , "registration" )
if _ , err := os . Stat ( triggerPathFile ) ; err != nil {
if errors . Is ( err , os . ErrNotExist ) {
if _ , err = os . Create ( triggerPathFile ) ; err != nil {
framework . Fail ( fmt . Sprintf ( "registration control file %q creation failed: %v" , triggerPathFile , err ) )
2023-04-26 12:41:14 -04:00
}
2023-06-21 13:20:05 -04:00
framework . Logf ( "registration control file created successfully" )
} else {
framework . Fail ( fmt . Sprintf ( "unexpected error creating %q: %v" , triggerPathFile , err ) )
2023-04-26 12:41:14 -04:00
}
2023-06-21 13:20:05 -04:00
} else {
framework . Logf ( "registration control file %q already present" , triggerPathFile )
2023-04-26 12:41:14 -04:00
}
ginkgo . By ( "Scheduling a sample device plugin pod" )
data , err := e2etestfiles . Read ( SampleDevicePluginControlRegistrationDSYAML )
if err != nil {
2023-06-21 13:20:05 -04:00
framework . Fail ( fmt . Sprintf ( "error reading test data %q: %v" , SampleDevicePluginControlRegistrationDSYAML , err ) )
2023-04-26 12:41:14 -04:00
}
ds := readDaemonSetV1OrDie ( data )
dp := & v1 . Pod {
ObjectMeta : metav1 . ObjectMeta {
Name : SampleDevicePluginName ,
} ,
Spec : ds . Spec . Template . Spec ,
}
devicePluginPod = e2epod . NewPodClient ( f ) . CreateSync ( ctx , dp )
go func ( ) {
// Since autoregistration is disabled for the device plugin (as REGISTER_CONTROL_FILE
// environment variable is specified), device plugin registration needs to be triggerred
// manually.
// This is done by deleting the control file at the following path:
// `/var/lib/kubelet/device-plugins/sample/registration`.
defer ginkgo . GinkgoRecover ( )
framework . Logf ( "Deleting the control file: %q to trigger registration" , triggerPathFile )
err := os . Remove ( triggerPathFile )
framework . ExpectNoError ( err )
} ( )
ginkgo . By ( "Waiting for devices to become available on the local node" )
gomega . Eventually ( ctx , func ( ctx context . Context ) bool {
node , ready := getLocalTestNode ( ctx , f )
return ready && CountSampleDeviceCapacity ( node ) > 0
2024-07-31 11:58:15 -04:00
} , 5 * time . Minute , framework . Poll ) . Should ( gomega . BeTrueBecause ( "expected devices to be available on the local node" ) )
2023-04-26 12:41:14 -04:00
framework . Logf ( "Successfully created device plugin pod" )
ginkgo . By ( fmt . Sprintf ( "Waiting for the resource exported by the sample device plugin to become available on the local node (instances: %d)" , expectedSampleDevsAmount ) )
gomega . Eventually ( ctx , func ( ctx context . Context ) bool {
node , ready := getLocalTestNode ( ctx , f )
return ready &&
CountSampleDeviceCapacity ( node ) == expectedSampleDevsAmount &&
CountSampleDeviceAllocatable ( node ) == expectedSampleDevsAmount
2024-07-31 11:58:15 -04:00
} , 30 * time . Second , framework . Poll ) . Should ( gomega . BeTrueBecause ( "expected resource to be available on local node" ) )
2023-04-26 12:41:14 -04:00
} )
ginkgo . AfterEach ( func ( ctx context . Context ) {
ginkgo . By ( "Deleting the device plugin pod" )
2025-01-20 11:26:04 -05:00
e2epod . NewPodClient ( f ) . DeleteSync ( ctx , devicePluginPod . Name , metav1 . DeleteOptions { } , f . Timeouts . PodDelete )
2023-04-26 12:41:14 -04:00
ginkgo . By ( "Deleting any Pods created by the test" )
l , err := e2epod . NewPodClient ( f ) . List ( ctx , metav1 . ListOptions { } )
framework . ExpectNoError ( err )
for _ , p := range l . Items {
if p . Namespace != f . Namespace . Name {
continue
}
2024-07-24 04:49:18 -04:00
ginkgo . By ( "Removing the finalizer from the pod in case it was used" )
e2epod . NewPodClient ( f ) . RemoveFinalizer ( context . TODO ( ) , p . Name , testFinalizer )
2023-04-26 12:41:14 -04:00
framework . Logf ( "Deleting pod: %s" , p . Name )
2025-01-20 11:26:04 -05:00
e2epod . NewPodClient ( f ) . DeleteSync ( ctx , p . Name , metav1 . DeleteOptions { } , f . Timeouts . PodDelete )
2023-04-26 12:41:14 -04:00
}
err = os . Remove ( triggerPathDir )
framework . ExpectNoError ( err )
ginkgo . By ( "Waiting for devices to become unavailable on the local node" )
gomega . Eventually ( ctx , func ( ctx context . Context ) bool {
node , ready := getLocalTestNode ( ctx , f )
return ready && CountSampleDeviceCapacity ( node ) <= 0
2024-07-31 11:58:15 -04:00
} , 5 * time . Minute , framework . Poll ) . Should ( gomega . BeTrueBecause ( "expected devices to be unavailable on local node" ) )
2023-04-26 12:41:14 -04:00
ginkgo . By ( "devices now unavailable on the local node" )
} )
// simulate node reboot scenario by removing pods using CRI before kubelet is started. In addition to that,
// intentionally a scenario is created where after node reboot, application pods requesting devices appear before the device plugin pod
2023-06-15 11:41:42 -04:00
// exposing those devices as resource has restarted. The expected behavior is that the application pod fails at admission time.
2025-01-29 06:05:58 -05:00
framework . It ( "Does not keep device plugin assignments across node reboots if fails admission (no pod restart, no device plugin re-registration)" , func ( ctx context . Context ) {
2023-04-26 12:41:14 -04:00
podRECMD := fmt . Sprintf ( "devs=$(ls /tmp/ | egrep '^Dev-[0-9]+$') && echo stub devices: $devs && sleep %s" , sleepIntervalForever )
pod1 := e2epod . NewPodClient ( f ) . CreateSync ( ctx , makeBusyboxPod ( SampleDeviceResourceName , podRECMD ) )
deviceIDRE := "stub devices: (Dev-[0-9]+)"
devID1 , err := parseLog ( ctx , f , pod1 . Name , pod1 . Name , deviceIDRE )
framework . ExpectNoError ( err , "getting logs for pod %q" , pod1 . Name )
gomega . Expect ( devID1 ) . To ( gomega . Not ( gomega . Equal ( "" ) ) )
pod1 , err = e2epod . NewPodClient ( f ) . Get ( ctx , pod1 . Name , metav1 . GetOptions { } )
framework . ExpectNoError ( err )
ginkgo . By ( "stopping the kubelet" )
2024-11-05 09:09:04 -05:00
restartKubelet := mustStopKubelet ( ctx , f )
2023-04-26 12:41:14 -04:00
ginkgo . By ( "stopping all the local containers - using CRI" )
rs , _ , err := getCRIClient ( )
framework . ExpectNoError ( err )
sandboxes , err := rs . ListPodSandbox ( ctx , & runtimeapi . PodSandboxFilter { } )
framework . ExpectNoError ( err )
for _ , sandbox := range sandboxes {
gomega . Expect ( sandbox . Metadata ) . ToNot ( gomega . BeNil ( ) )
ginkgo . By ( fmt . Sprintf ( "deleting pod using CRI: %s/%s -> %s" , sandbox . Metadata . Namespace , sandbox . Metadata . Name , sandbox . Id ) )
err := rs . RemovePodSandbox ( ctx , sandbox . Id )
framework . ExpectNoError ( err )
}
ginkgo . By ( "restarting the kubelet" )
2024-11-05 09:09:04 -05:00
restartKubelet ( ctx )
2023-04-26 12:41:14 -04:00
ginkgo . By ( "Wait for node to be ready again" )
e2enode . WaitForAllNodesSchedulable ( ctx , f . ClientSet , 5 * time . Minute )
ginkgo . By ( "Waiting for the pod to fail with admission error as device plugin hasn't re-registered yet" )
gomega . Eventually ( ctx , getPod ) .
WithArguments ( f , pod1 . Name ) .
WithTimeout ( time . Minute ) .
Should ( HaveFailedWithAdmissionError ( ) ,
"the pod succeeded to start, when it should fail with the admission error" )
// crosscheck from the device assignment is preserved and stable from perspective of the kubelet.
// note we don't check again the logs of the container: the check is done at startup, the container
// never restarted (runs "forever" from this test timescale perspective) hence re-doing this check
// is useless.
ginkgo . By ( "Verifying the device assignment after kubelet restart using podresources API" )
gomega . Eventually ( ctx , func ( ) error {
v1PodResources , err = getV1NodeDevices ( ctx )
return err
} , 30 * time . Second , framework . Poll ) . ShouldNot ( gomega . HaveOccurred ( ) , "cannot fetch the compute resource assignment after kubelet restart" )
e2e: node: fix misleading device plugin test
We have a e2e test which tries to ensure device plugin assignments to pods are kept
across node reboots. And this tests is permafailing since many weeks at
time of writing (xref: #128443).
Problem is: closer inspection reveals the test was well intentioned, but
puzzling:
The test runs a pod, then restarts the kubelet, then _expects the pod to
end up in admission failure_ and yet _ensure the device assignment is
kept_! https://github.com/kubernetes/kubernetes/blob/v1.32.0-rc.0/test/e2e_node/device_plugin_test.go#L97
A reader can legitmately wonder if this means the device will be kept busy forever?
This is not the case, luckily. The test however embodied the behavior at
time of the kubelet, in turn caused by #103979
Device manager used to record the last admitted pod and forcibly added
to the list of active pod. The retention logic had space for exactly one
pod, the last which attempted admission.
This retention prevented the cleanup code
(see: https://github.com/kubernetes/kubernetes/blob/v1.32.0-rc.0/pkg/kubelet/cm/devicemanager/manager.go#L549
compare to: https://github.com/kubernetes/kubernetes/blob/v1.31.0-rc.0/pkg/kubelet/cm/devicemanager/manager.go#L549)
to clear the registration, so the device was still (mis)reported
allocated to the failed pod.
This fact was in turn leveraged by the test in question:
the test uses the podresources API to learn about the device assignment,
and because of the chain of events above the pod failed admission yet
was still reported as owning the device.
What happened however was the next pod trying admission would have
replaced the previous pod in the device manager data, so the previous
pod was no longer forced to be added into the active list, so its
assignment were correctly cleared once the cleanup code runs;
And the cleanup code is run, among other things, every time device
manager is asked to allocated devices and every time podresources API
queries the device assignment
Later, in PR https://github.com/kubernetes/kubernetes/pull/120661
the forced retention logic was removed from all the resource managers,
thus also from device manager, and this is what caused the permafailure.
Because all of the above, it should be evident that the e2e test was
actually enforcing a very specific and not really work-as-intended
behavior, which was also overall quite puzzling for users.
The best we can do is to fix the test to record and ensure that
pods which did fail admission _do not_ retain device assignment.
Unfortunately, we _cannot_ guarantee the desirable property that
pod going running retain their device assignment across node reboots.
In the kubelet restart flow, all pods race to be admitted. There's no
order enforced between device plugin pods and application pods.
Unless an application pod is lucky enough to _lose_ the race with both
the device plugin (to go running before the app pod does) and _also_
with the kubelet (which needs to set devices healthy before the pod
tries admission).
Signed-off-by: Francesco Romani <fromani@redhat.com>
2024-11-27 11:20:16 -05:00
// if we got this far, podresources API will now report 2 entries:
// - sample device plugin pod, running and doing fine
// - our test pod, in failed state. Pods in terminal state will still be reported, see https://github.com/kubernetes/kubernetes/issues/119423
// so we care about our test pod, and it will be present in the returned list till 119423 is fixed, but since it failed admission it must not have
// any device allocated to it, hence we check for empty device set in the podresources response. So, we check that
// A. our test pod must be present in the list response *and*
// B. it has no devices assigned to it.
// anything else is unexpected and thus makes the test fail. Once 119423 is fixed, a better, simpler and more intuitive check will be for the
// test pod to not be present in the podresources list response, but till that time we're stuck with this approach.
_ , found := checkPodResourcesAssignment ( v1PodResources , pod1 . Namespace , pod1 . Name , pod1 . Spec . Containers [ 0 ] . Name , SampleDeviceResourceName , [ ] string { } )
gomega . Expect ( found ) . To ( gomega . BeTrueBecause ( "%s/%s/%s failed admission, should not have devices registered" , pod1 . Namespace , pod1 . Name , pod1 . Spec . Containers [ 0 ] . Name ) )
2023-04-26 12:41:14 -04:00
} )
} )
}
2018-01-11 01:41:45 -05:00
// makeBusyboxPod returns a simple Pod spec with a busybox container
2023-02-21 07:50:33 -05:00
// that requests SampleDeviceResourceName and runs the specified command.
func makeBusyboxPod ( SampleDeviceResourceName , cmd string ) * v1 . Pod {
2017-12-29 05:43:38 -05:00
podName := "device-plugin-test-" + string ( uuid . NewUUID ( ) )
2023-02-21 07:50:33 -05:00
rl := v1 . ResourceList { v1 . ResourceName ( SampleDeviceResourceName ) : * resource . NewQuantity ( 1 , resource . DecimalSI ) }
2017-12-29 05:43:38 -05:00
return & v1 . Pod {
ObjectMeta : metav1 . ObjectMeta { Name : podName } ,
Spec : v1 . PodSpec {
RestartPolicy : v1 . RestartPolicyAlways ,
Containers : [ ] v1 . Container { {
Image : busyboxImage ,
Name : podName ,
// Runs the specified command in the test pod.
Command : [ ] string { "sh" , "-c" , cmd } ,
Resources : v1 . ResourceRequirements {
Limits : rl ,
Requests : rl ,
} ,
} } ,
} ,
}
}
2018-04-25 03:44:27 -04:00
// ensurePodContainerRestart confirms that pod container has restarted at least once
2022-12-12 04:11:10 -05:00
func ensurePodContainerRestart ( ctx context . Context , f * framework . Framework , podName string , contName string ) {
2018-04-25 03:44:27 -04:00
var initialCount int32
var currentCount int32
2022-12-12 04:11:10 -05:00
p , err := e2epod . NewPodClient ( f ) . Get ( ctx , podName , metav1 . GetOptions { } )
2018-04-25 03:44:27 -04:00
if err != nil || len ( p . Status . ContainerStatuses ) < 1 {
2019-08-27 05:18:43 -04:00
framework . Failf ( "ensurePodContainerRestart failed for pod %q: %v" , podName , err )
2018-04-25 03:44:27 -04:00
}
initialCount = p . Status . ContainerStatuses [ 0 ] . RestartCount
2024-07-31 11:58:15 -04:00
gomega . Eventually ( ctx , func ( ) int {
2022-12-12 04:11:10 -05:00
p , err = e2epod . NewPodClient ( f ) . Get ( ctx , podName , metav1 . GetOptions { } )
2017-12-29 05:43:38 -05:00
if err != nil || len ( p . Status . ContainerStatuses ) < 1 {
2024-07-31 11:58:15 -04:00
return 0
2017-12-29 05:43:38 -05:00
}
2018-04-25 03:44:27 -04:00
currentCount = p . Status . ContainerStatuses [ 0 ] . RestartCount
2019-08-27 05:18:43 -04:00
framework . Logf ( "initial %v, current %v" , initialCount , currentCount )
2024-07-31 11:58:15 -04:00
return int ( currentCount )
} , 5 * time . Minute , framework . Poll ) . Should ( gomega . BeNumerically ( ">" , initialCount ) )
2018-04-25 03:44:27 -04:00
}
2018-01-11 01:41:45 -05:00
2018-04-25 03:44:27 -04:00
// parseLog returns the matching string for the specified regular expression parsed from the container logs.
2023-03-13 00:54:11 -04:00
func parseLog ( ctx context . Context , f * framework . Framework , podName string , contName string , re string ) ( string , error ) {
2022-12-12 04:11:10 -05:00
logs , err := e2epod . GetPodLogs ( ctx , f . ClientSet , f . Namespace . Name , podName , contName )
2017-12-29 05:43:38 -05:00
if err != nil {
2023-03-13 00:54:11 -04:00
return "" , err
2017-12-29 05:43:38 -05:00
}
2018-01-11 01:41:45 -05:00
2019-08-27 05:18:43 -04:00
framework . Logf ( "got pod logs: %v" , logs )
2017-12-29 05:43:38 -05:00
regex := regexp . MustCompile ( re )
matches := regex . FindStringSubmatch ( logs )
if len ( matches ) < 2 {
2023-03-13 00:54:11 -04:00
return "" , fmt . Errorf ( "unexpected match in logs: %q" , logs )
2017-12-29 05:43:38 -05:00
}
2018-01-11 01:41:45 -05:00
2023-03-13 00:54:11 -04:00
return matches [ 1 ] , nil
2017-12-29 05:43:38 -05:00
}
2023-03-09 13:20:46 -05:00
2023-06-21 13:20:58 -04:00
func checkPodResourcesAssignment ( v1PodRes * kubeletpodresourcesv1 . ListPodResourcesResponse , podNamespace , podName , containerName , resourceName string , devs [ ] string ) ( error , bool ) {
2023-03-09 13:20:46 -05:00
for _ , podRes := range v1PodRes . PodResources {
if podRes . Namespace != podNamespace || podRes . Name != podName {
continue
}
for _ , contRes := range podRes . Containers {
if contRes . Name != containerName {
continue
}
return matchContainerDevices ( podNamespace + "/" + podName + "/" + containerName , contRes . Devices , resourceName , devs )
}
}
2024-10-26 11:56:54 -04:00
v1PodResStr := ""
for _ , p := range v1PodRes . PodResources {
for _ , c := range p . Containers {
v1PodResStr += fmt . Sprintf ( "%s/%s/%s," , p . Namespace , p . Name , c . Name )
}
}
err := fmt . Errorf ( "no resources found for %s/%s/%s in listpodresources [%s]" , podNamespace , podName , containerName , v1PodResStr )
2023-06-21 13:20:58 -04:00
framework . Logf ( "%v" , err )
return err , false
2023-03-09 13:20:46 -05:00
}
2023-06-21 13:20:58 -04:00
func matchContainerDevices ( ident string , contDevs [ ] * kubeletpodresourcesv1 . ContainerDevices , resourceName string , devs [ ] string ) ( error , bool ) {
2023-03-09 13:20:46 -05:00
expected := sets . New [ string ] ( devs ... )
assigned := sets . New [ string ] ( )
for _ , contDev := range contDevs {
if contDev . ResourceName != resourceName {
continue
}
assigned = assigned . Insert ( contDev . DeviceIds ... )
}
expectedStr := strings . Join ( expected . UnsortedList ( ) , "," )
assignedStr := strings . Join ( assigned . UnsortedList ( ) , "," )
framework . Logf ( "%s: devices expected %q assigned %q" , ident , expectedStr , assignedStr )
if ! assigned . Equal ( expected ) {
2023-06-21 13:20:58 -04:00
return fmt . Errorf ( "device allocation mismatch for %s expected %s assigned %s" , ident , expectedStr , assignedStr ) , true
2023-03-09 13:20:46 -05:00
}
2023-06-21 13:20:58 -04:00
return nil , true
2023-03-09 13:20:46 -05:00
}
2023-05-02 16:06:02 -04:00
// getSampleDevicePluginPod returns the Sample Device Plugin pod to be used e2e tests.
func getSampleDevicePluginPod ( pluginSockDir string ) * v1 . Pod {
data , err := e2etestfiles . Read ( SampleDevicePluginDSYAML )
if err != nil {
framework . Fail ( err . Error ( ) )
}
ds := readDaemonSetV1OrDie ( data )
dp := & v1 . Pod {
ObjectMeta : metav1 . ObjectMeta {
Name : SampleDevicePluginName ,
} ,
Spec : ds . Spec . Template . Spec ,
}
for i := range dp . Spec . Containers [ 0 ] . Env {
if dp . Spec . Containers [ 0 ] . Env [ i ] . Name == SampleDeviceEnvVarNamePluginSockDir {
dp . Spec . Containers [ 0 ] . Env [ i ] . Value = pluginSockDir
}
}
2025-06-03 22:10:57 -04:00
dp . Spec . Containers [ 0 ] . Env = append ( dp . Spec . Containers [ 0 ] . Env , v1 . EnvVar { Name : "CDI_ENABLED" , Value : "1" } )
2023-10-23 06:47:30 -04:00
2023-05-02 16:06:02 -04:00
return dp
}
2023-06-15 11:41:42 -04:00
func BeTheSamePodStillRunning ( expected * v1 . Pod ) types . GomegaMatcher {
return gomega . And (
BeTheSamePodAs ( expected . UID ) ,
BeAPodInPhase ( v1 . PodRunning ) ,
BeAPodReady ( ) ,
)
}
// BeReady matches if the pod is reported ready
func BeAPodReady ( ) types . GomegaMatcher {
return gcustom . MakeMatcher ( func ( actual * v1 . Pod ) ( bool , error ) {
return podutils . IsPodReady ( actual ) , nil
} ) . WithTemplate ( "Pod {{.Actual.Namespace}}/{{.Actual.Name}} UID {{.Actual.UID}} not ready yet" )
}
// BeAPodInPhase matches if the pod is running
func BeAPodInPhase ( phase v1 . PodPhase ) types . GomegaMatcher {
return gcustom . MakeMatcher ( func ( actual * v1 . Pod ) ( bool , error ) {
return actual . Status . Phase == phase , nil
} ) . WithTemplate ( "Pod {{.Actual.Namespace}}/{{.Actual.Name}} failed {{.To}} be in phase {{.Data}} instead is in phase {{.Actual.Status.Phase}}" ) . WithTemplateData ( phase )
}
// BeTheSamePodAs matches if the pod has the given UID
func BeTheSamePodAs ( podUID k8stypes . UID ) types . GomegaMatcher {
return gcustom . MakeMatcher ( func ( actual * v1 . Pod ) ( bool , error ) {
return actual . UID == podUID , nil
} ) . WithTemplate ( "Pod {{.Actual.Namespace}}/{{.Actual.Name}} expected UID {{.Data}} has UID instead {{.Actual.UID}}" ) . WithTemplateData ( podUID )
}