Merge pull request #135732 from pohly/dra-upgrade-downgrade-device-taints

DRA: upgrade/downgrade device taints
This commit is contained in:
Kubernetes Prow Robot 2026-02-17 22:13:42 +05:30 committed by GitHub
commit c9020bef5a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 208 additions and 72 deletions

View file

@ -272,7 +272,7 @@ var _ = framework.SIGDescribe("node")(framework.WithLabel("DRA"), func() {
ginkgo.It("must not run a pod if a claim is not ready", func(ctx context.Context) {
claim := b.ExternalClaim()
b.Create(f.TContext(ctx), claim)
pod := b.PodExternal()
pod := b.PodExternal(claim.Name)
// This bypasses scheduling and therefore the pod gets
// to run on the node although the claim is not ready.
@ -295,7 +295,7 @@ var _ = framework.SIGDescribe("node")(framework.WithLabel("DRA"), func() {
ginkgo.It("must unprepare resources for force-deleted pod", func(ctx context.Context) {
claim := b.ExternalClaim()
pod := b.PodExternal()
pod := b.PodExternal(claim.Name)
zero := int64(0)
pod.Spec.TerminationGracePeriodSeconds = &zero
@ -361,7 +361,7 @@ var _ = framework.SIGDescribe("node")(framework.WithLabel("DRA"), func() {
claimForContainer1.Spec.Devices.Config[1].Opaque.Parameters.Raw = []byte(`{"container1_config1":"true"}`)
claimForContainer1.Spec.Devices.Config[2].Opaque.Parameters.Raw = []byte(`{"container1_config2":"true"}`)
pod := b.PodExternal()
pod := b.PodExternal("")
pod.Spec.ResourceClaims = []v1.PodResourceClaim{
{
Name: "all",
@ -445,7 +445,7 @@ var _ = framework.SIGDescribe("node")(framework.WithLabel("DRA"), func() {
// that the race goes bad (old pod pending shutdown when
// new one arrives) and always schedules to the same node.
claim := b.ExternalClaim()
pod := b.PodExternal()
pod := b.PodExternal(claim.Name)
node := nodes.NodeNames[0]
pod.Spec.NodeSelector = map[string]string{"kubernetes.io/hostname": node}
oldClaim := b.Create(f.TContext(ctx), claim, pod)[0].(*resourceapi.ResourceClaim)
@ -616,8 +616,8 @@ var _ = framework.SIGDescribe("node")(framework.WithLabel("DRA"), func() {
})
ginkgo.It("supports init containers with external claims", func(ctx context.Context) {
pod := b.PodExternal()
claim := b.ExternalClaim()
pod := b.PodExternal(claim.Name)
pod.Spec.InitContainers = []v1.Container{pod.Spec.Containers[0]}
pod.Spec.InitContainers[0].Name += "-init"
// This must succeed for the pod to start.
@ -628,8 +628,8 @@ var _ = framework.SIGDescribe("node")(framework.WithLabel("DRA"), func() {
})
ginkgo.It("removes reservation from claim when pod is done", func(ctx context.Context) {
pod := b.PodExternal()
claim := b.ExternalClaim()
pod := b.PodExternal(claim.Name)
pod.Spec.Containers[0].Command = []string{"true"}
b.Create(f.TContext(ctx), claim, pod)
@ -680,7 +680,7 @@ var _ = framework.SIGDescribe("node")(framework.WithLabel("DRA"), func() {
b := drautils.NewBuilderNow(tCtx, driver)
claim := b.ExternalClaim()
pod := b.PodExternal()
pod := b.PodExternal(claim.Name)
b.Create(tCtx, claim, pod)
// Cannot run pod, no devices.
@ -725,7 +725,7 @@ var _ = framework.SIGDescribe("node")(framework.WithLabel("DRA"), func() {
// Build behaves the same for both driver instances.
b := drautils.NewBuilderNow(tCtx, oldDriver)
claim := b.ExternalClaim()
pod := b.PodExternal()
pod := b.PodExternal(claim.Name)
b.Create(tCtx, claim, pod)
b.TestPod(tCtx, pod)
@ -768,7 +768,7 @@ var _ = framework.SIGDescribe("node")(framework.WithLabel("DRA"), func() {
// Build behaves the same for both driver instances.
b := drautils.NewBuilderNow(tCtx, oldDriver)
claim := b.ExternalClaim()
pod := b.PodExternal()
pod := b.PodExternal(claim.Name)
b.Create(tCtx, claim, pod)
b.TestPod(tCtx, pod)
@ -814,7 +814,7 @@ var _ = framework.SIGDescribe("node")(framework.WithLabel("DRA"), func() {
// Build behaves the same for both driver instances.
b := drautils.NewBuilderNow(tCtx, oldDriver)
claim := b.ExternalClaim()
pod := b.PodExternal()
pod := b.PodExternal(claim.Name)
b.Create(tCtx, claim, pod)
b.TestPod(tCtx, pod)
@ -901,10 +901,11 @@ var _ = framework.SIGDescribe("node")(framework.WithLabel("DRA"), func() {
ginkgo.It("supports sharing a claim concurrently", func(ctx context.Context) {
tCtx := f.TContext(ctx)
var objects []klog.KMetadata
objects = append(objects, b.ExternalClaim())
claim := b.ExternalClaim()
objects = append(objects, claim)
pods := make([]*v1.Pod, numPods)
for i := 0; i < numPods; i++ {
pod := b.PodExternal()
pod := b.PodExternal(claim.Name)
pods[i] = pod
objects = append(objects, pod)
}
@ -1015,18 +1016,18 @@ var _ = framework.SIGDescribe("node")(framework.WithLabel("DRA"), func() {
ginkgo.It("supports simple pod referencing external resource claim", func(ctx context.Context) {
tCtx := f.TContext(ctx)
pod := b.PodExternal()
claim := b.ExternalClaim()
pod := b.PodExternal(claim.Name)
b.Create(tCtx, claim, pod)
b.TestPod(tCtx, pod)
})
ginkgo.It("supports external claim referenced by multiple pods", func(ctx context.Context) {
tCtx := f.TContext(ctx)
pod1 := b.PodExternal()
pod2 := b.PodExternal()
pod3 := b.PodExternal()
claim := b.ExternalClaim()
pod1 := b.PodExternal(claim.Name)
pod2 := b.PodExternal(claim.Name)
pod3 := b.PodExternal(claim.Name)
b.Create(tCtx, claim, pod1, pod2, pod3)
for _, pod := range []*v1.Pod{pod1, pod2, pod3} {
@ -1036,10 +1037,10 @@ var _ = framework.SIGDescribe("node")(framework.WithLabel("DRA"), func() {
ginkgo.It("supports external claim referenced by multiple containers of multiple pods", func(ctx context.Context) {
tCtx := f.TContext(ctx)
pod1 := b.PodExternalMultiple()
pod2 := b.PodExternalMultiple()
pod3 := b.PodExternalMultiple()
claim := b.ExternalClaim()
pod1 := b.PodExternalMultiple(claim.Name)
pod2 := b.PodExternalMultiple(claim.Name)
pod3 := b.PodExternalMultiple(claim.Name)
b.Create(tCtx, claim, pod1, pod2, pod3)
for _, pod := range []*v1.Pod{pod1, pod2, pod3} {
@ -1061,8 +1062,8 @@ var _ = framework.SIGDescribe("node")(framework.WithLabel("DRA"), func() {
ginkgo.It("must deallocate after use", func(ctx context.Context) {
tCtx := f.TContext(ctx)
pod := b.PodExternal()
claim := b.ExternalClaim()
pod := b.PodExternal(claim.Name)
b.Create(tCtx, claim, pod)
gomega.Eventually(ctx, func(ctx context.Context) (*resourceapi.ResourceClaim, error) {
@ -1086,11 +1087,11 @@ var _ = framework.SIGDescribe("node")(framework.WithLabel("DRA"), func() {
tCtx := f.TContext(ctx)
extendedResourceName := deployDevicePlugin(tCtx, f, nodes.NodeNames[0:1], false)
pod := b.PodExternal()
claim := b.ExternalClaim()
pod := b.PodExternal(claim.Name)
resources := v1.ResourceList{extendedResourceName: resource.MustParse("1")}
pod.Spec.Containers[0].Resources.Requests = resources
pod.Spec.Containers[0].Resources.Limits = resources
claim := b.ExternalClaim()
b.Create(tCtx, claim, pod)
b.TestPod(tCtx, pod)
})
@ -1117,13 +1118,12 @@ var _ = framework.SIGDescribe("node")(framework.WithLabel("DRA"), func() {
b.Create(tCtx, firstClaim, secondClaim)
// First pod uses only firstClaim
firstPod := b.PodExternal()
firstPod := b.PodExternal(firstClaim.Name)
b.Create(tCtx, firstPod)
b.TestPod(tCtx, firstPod)
// Second pod uses firstClaim (already prepared) + secondClaim (new)
secondPod := b.PodExternal()
secondPod := b.PodExternal("")
secondPod.Spec.ResourceClaims = []v1.PodResourceClaim{
{
Name: "first",
@ -1362,7 +1362,7 @@ var _ = framework.SIGDescribe("node")(framework.WithLabel("DRA"), func() {
},
},
}
pod := b1.PodExternal()
pod := b1.PodExternal("")
podClaimName := "resource-claim"
externalClaimName := "external-multiclaim"
pod.Spec.ResourceClaims = []v1.PodResourceClaim{
@ -1453,7 +1453,7 @@ var _ = framework.SIGDescribe("node")(framework.WithLabel("DRA"), func() {
},
},
}
pod := b1.PodExternal()
pod := b1.PodExternal("")
podClaimName := "resource-claim"
externalClaimName := "external-multiclaim"
pod.Spec.ResourceClaims = []v1.PodResourceClaim{
@ -1532,7 +1532,7 @@ var _ = framework.SIGDescribe("node")(framework.WithLabel("DRA"), func() {
},
},
}
pod := b1.PodExternal()
pod := b1.PodExternal("")
podClaimName := "resource-claim"
externalClaimName := "external-multiclaim"
pod.Spec.ResourceClaims = []v1.PodResourceClaim{
@ -1750,7 +1750,7 @@ var _ = framework.SIGDescribe("node")(framework.WithLabel("DRA"), func() {
},
},
}
pod := b.PodExternal()
pod := b.PodExternal("")
podClaimName := "resource-claim"
pod.Spec.ResourceClaims = []v1.PodResourceClaim{
{
@ -1831,7 +1831,7 @@ var _ = framework.SIGDescribe("node")(framework.WithLabel("DRA"), func() {
},
},
}
pod := b.PodExternal()
pod := b.PodExternal("")
podClaimName := "resource-claim"
pod.Spec.ResourceClaims = []v1.PodResourceClaim{
{
@ -1904,7 +1904,7 @@ var _ = framework.SIGDescribe("node")(framework.WithLabel("DRA"), func() {
// available, there should be sufficient counters left to allocate
// a device.
claim := b.ExternalClaim()
pod := b.PodExternal()
pod := b.PodExternal(claim.Name)
pod.Spec.ResourceClaims[0].ResourceClaimName = &claim.Name
b.Create(tCtx, claim, pod)
b.TestPod(tCtx, pod)
@ -1912,8 +1912,7 @@ var _ = framework.SIGDescribe("node")(framework.WithLabel("DRA"), func() {
// For the second pod, there should not be sufficient counters left, so
// it should not succeed. This means the pod should remain in the pending state.
claim2 := b.ExternalClaim()
pod2 := b.PodExternal()
pod2.Spec.ResourceClaims[0].ResourceClaimName = &claim2.Name
pod2 := b.PodExternal(claim2.Name)
b.Create(tCtx, claim2, pod2)
gomega.Consistently(ctx, func(ctx context.Context) error {
@ -1969,8 +1968,7 @@ var _ = framework.SIGDescribe("node")(framework.WithLabel("DRA"), func() {
"memory": resource.MustParse("4Gi"),
},
}
pod := b.PodExternal()
pod.Spec.ResourceClaims[0].ResourceClaimName = &claim.Name
pod := b.PodExternal(claim.Name)
b.Create(tCtx, claim, pod)
b.TestPod(tCtx, pod)
@ -1981,8 +1979,7 @@ var _ = framework.SIGDescribe("node")(framework.WithLabel("DRA"), func() {
"memory": resource.MustParse("8Gi"),
},
}
pod2 := b.PodExternal()
pod2.Spec.ResourceClaims[0].ResourceClaimName = &claim2.Name
pod2 := b.PodExternal(claim2.Name)
b.Create(tCtx, claim2, pod2)
// The third pod should be able to use the rest 4Gi of the device.
@ -1992,8 +1989,7 @@ var _ = framework.SIGDescribe("node")(framework.WithLabel("DRA"), func() {
"memory": resource.MustParse("4Gi"),
},
}
pod3 := b.PodExternal()
pod3.Spec.ResourceClaims[0].ResourceClaimName = &claim3.Name
pod3 := b.PodExternal(claim3.Name)
b.Create(tCtx, claim3, pod3)
b.TestPod(tCtx, pod3)
@ -2298,7 +2294,7 @@ var _ = framework.SIGDescribe("node")(framework.WithLabel("DRA"), func() {
drautils.TestContainerEnv(tCtx, pod, pod.Spec.Containers[0].Name, false, containerEnv...)
claim := b.ExternalClaim()
pod2 := b.PodExternal()
pod2 := b.PodExternal(claim.Name)
b.Create(tCtx, claim, pod2)
b.TestPod(tCtx, pod2)
@ -2832,8 +2828,8 @@ var _ = framework.SIGDescribe("node")(framework.WithLabel("DRA"), func() {
f.It("must be possible for the driver to update the ResourceClaim.Status.Devices once allocated", f.WithFeatureGate(features.DRAResourceClaimDeviceStatus), func(ctx context.Context) {
tCtx := f.TContext(ctx)
pod := b.PodExternal()
claim := b.ExternalClaim()
pod := b.PodExternal(claim.Name)
b.Create(tCtx, claim, pod)
// Waits for the ResourceClaim to be allocated and the pod to be scheduled.
@ -3073,7 +3069,7 @@ var _ = framework.SIGDescribe("node")(framework.WithLabel("DRA"), func() {
claim1b := b1.ExternalClaim()
claim2 := b2.ExternalClaim()
claim2b := b2.ExternalClaim()
pod := b1.PodExternal()
pod := b1.PodExternal(claim1.Name)
for i, claim := range []*resourceapi.ResourceClaim{claim1b, claim2, claim2b} {
pod.Spec.ResourceClaims = append(pod.Spec.ResourceClaims,
v1.PodResourceClaim{

View file

@ -60,7 +60,7 @@ func (b *Builder) ExtendedResourceName(i int) string {
case SingletonIndex:
return e2enode.SampleDeviceResourceName
default:
return b.driver.Name + "/resource" + fmt.Sprintf("-%d", i)
return b.Driver.Name + "/resource" + fmt.Sprintf("-%d", i)
}
}
@ -68,7 +68,7 @@ func (b *Builder) ExtendedResourceName(i int) string {
// namespace.
type Builder struct {
namespace string
driver *Driver
Driver *Driver
UseExtendedResourceName bool
podCounter int
@ -79,7 +79,7 @@ type Builder struct {
// ClassName returns the default device class name.
func (b *Builder) ClassName() string {
return b.namespace + b.driver.NameSuffix + "-class"
return b.namespace + b.Driver.NameSuffix + "-class"
}
// SingletonIndex causes Builder.Class and ExtendedResourceName to create a
@ -115,14 +115,14 @@ func (b *Builder) Class(i int) *resourceapi.DeviceClass {
}
class.Spec.Selectors = []resourceapi.DeviceSelector{{
CEL: &resourceapi.CELDeviceSelector{
Expression: fmt.Sprintf(`device.driver == "%s"`, b.driver.Name),
Expression: fmt.Sprintf(`device.driver == "%s"`, b.Driver.Name),
},
}}
if b.ClassParameters != "" {
class.Spec.Config = []resourceapi.DeviceClassConfiguration{{
DeviceConfiguration: resourceapi.DeviceConfiguration{
Opaque: &resourceapi.OpaqueDeviceConfiguration{
Driver: b.driver.Name,
Driver: b.Driver.Name,
Parameters: runtime.RawExtension{Raw: []byte(b.ClassParameters)},
},
},
@ -135,7 +135,7 @@ func (b *Builder) Class(i int) *resourceapi.DeviceClass {
// that test pods can reference
func (b *Builder) ExternalClaim() *resourceapi.ResourceClaim {
b.claimCounter++
name := "external-claim" + b.driver.NameSuffix // This is what podExternal expects.
name := "external-claim" + b.Driver.NameSuffix // This is what podExternal expects.
if b.claimCounter > 1 {
name += fmt.Sprintf("-%d", b.claimCounter)
}
@ -160,7 +160,7 @@ func (b *Builder) claimSpecWithV1beta1() resourcev1beta1.ResourceClaimSpec {
Config: []resourcev1beta1.DeviceClaimConfiguration{{
DeviceConfiguration: resourcev1beta1.DeviceConfiguration{
Opaque: &resourcev1beta1.OpaqueDeviceConfiguration{
Driver: b.driver.Name,
Driver: b.Driver.Name,
Parameters: runtime.RawExtension{
Raw: []byte(parameters),
},
@ -188,7 +188,7 @@ func (b *Builder) claimSpecWithV1beta2() resourcev1beta2.ResourceClaimSpec {
Config: []resourcev1beta2.DeviceClaimConfiguration{{
DeviceConfiguration: resourcev1beta2.DeviceConfiguration{
Opaque: &resourcev1beta2.OpaqueDeviceConfiguration{
Driver: b.driver.Name,
Driver: b.Driver.Name,
Parameters: runtime.RawExtension{
Raw: []byte(parameters),
},
@ -216,7 +216,7 @@ func (b *Builder) ClaimSpec() resourceapi.ResourceClaimSpec {
Config: []resourceapi.DeviceClaimConfiguration{{
DeviceConfiguration: resourceapi.DeviceConfiguration{
Opaque: &resourceapi.OpaqueDeviceConfiguration{
Driver: b.driver.Name,
Driver: b.Driver.Name,
Parameters: runtime.RawExtension{
Raw: []byte(parameters),
},
@ -249,7 +249,7 @@ func (b *Builder) Pod() *v1.Pod {
pod.Spec.RestartPolicy = v1.RestartPolicyNever
pod.GenerateName = ""
b.podCounter++
pod.Name = fmt.Sprintf("tester%s-%d", b.driver.NameSuffix, b.podCounter)
pod.Name = fmt.Sprintf("tester%s-%d", b.Driver.NameSuffix, b.podCounter)
return pod
}
@ -313,12 +313,11 @@ func (b *Builder) PodInlineMultiple() (*v1.Pod, *resourceapi.ResourceClaimTempla
return pod, template
}
// PodExternal adds a pod that references external resource claim with default class name and parameters.
func (b *Builder) PodExternal() *v1.Pod {
// PodExternal adds a pod that references the named resource claim.
func (b *Builder) PodExternal(externalClaimName string) *v1.Pod {
pod := b.Pod()
pod.Spec.Containers[0].Name = "with-resource"
podClaimName := "resource-claim"
externalClaimName := "external-claim" + b.driver.NameSuffix
pod.Spec.ResourceClaims = []v1.PodResourceClaim{
{
Name: podClaimName,
@ -329,9 +328,9 @@ func (b *Builder) PodExternal() *v1.Pod {
return pod
}
// podShared returns a pod with 3 containers that reference external resource claim with default class name and parameters.
func (b *Builder) PodExternalMultiple() *v1.Pod {
pod := b.PodExternal()
// podShared returns a pod with 3 containers that reference the named external resource claim.
func (b *Builder) PodExternalMultiple(externalClaimName string) *v1.Pod {
pod := b.PodExternal(externalClaimName)
pod.Spec.Containers = append(pod.Spec.Containers, *pod.Spec.Containers[0].DeepCopy(), *pod.Spec.Containers[0].DeepCopy())
pod.Spec.Containers[1].Name += "-1"
pod.Spec.Containers[2].Name += "-2"
@ -417,7 +416,7 @@ func (b *Builder) DeletePodAndWaitForNotFound(tCtx ktesting.TContext, pod *v1.Po
func (b *Builder) TestPod(tCtx ktesting.TContext, pod *v1.Pod, env ...string) {
tCtx.Helper()
if !b.driver.WithKubelet {
if !b.Driver.WithKubelet {
// Less testing when we cannot rely on the kubelet to actually run the pod.
err := e2epod.WaitForPodScheduled(tCtx, tCtx.Client(), pod.Namespace, pod.Name)
tCtx.ExpectNoError(err, "schedule pod")
@ -474,7 +473,7 @@ func TestContainerEnv(tCtx ktesting.TContext, pod *v1.Pod, containerName string,
}
func NewBuilder(f *framework.Framework, driver *Driver) *Builder {
b := &Builder{driver: driver}
b := &Builder{Driver: driver}
ginkgo.BeforeEach(func() {
b.setUp(f.TContext(context.Background()))
})
@ -482,7 +481,7 @@ func NewBuilder(f *framework.Framework, driver *Driver) *Builder {
}
func NewBuilderNow(tCtx ktesting.TContext, driver *Driver) *Builder {
b := &Builder{driver: driver}
b := &Builder{Driver: driver}
b.setUp(tCtx)
return b
}
@ -516,7 +515,7 @@ func (b *Builder) tearDown(tCtx ktesting.TContext) {
}
tCtx.Logf("Deleting %T %s", &pod, klog.KObj(&pod))
options := metav1.DeleteOptions{}
if !b.driver.WithRealNodes {
if !b.Driver.WithRealNodes {
// Force-delete, no kubelet.
options.GracePeriodSeconds = ptr.To(int64(0))
}
@ -542,7 +541,7 @@ func (b *Builder) tearDown(tCtx ktesting.TContext) {
}
}
for host, plugin := range b.driver.Nodes {
for host, plugin := range b.Driver.Nodes {
tCtx.Logf("Waiting for resources on %s to be unprepared", host)
tCtx.Eventually(func(ktesting.TContext) []app.ClaimID { return plugin.GetPreparedResources() }).WithTimeout(time.Minute).Should(gomega.BeEmpty(), "prepared claims on host %s", host)
}

View file

@ -1038,6 +1038,7 @@ func (d *Driver) TearDown(tCtx ktesting.TContext) {
//
// Only use this in tests where kubelet support for DRA is guaranteed.
func (d *Driver) IsGone(tCtx ktesting.TContext) {
tCtx.Helper()
tCtx.Logf("Waiting for ResourceSlices of driver %s to be removed...", d.Name)
tCtx.Eventually(d.NewGetSlices()).WithTimeout(2 * time.Minute).Should(gomega.HaveField("Items", gomega.BeEmpty()))
}

View file

@ -31,7 +31,7 @@ import (
func coreDRA(tCtx ktesting.TContext, b *drautils.Builder) upgradedTestFunc {
namespace := tCtx.Namespace()
claim := b.ExternalClaim()
pod := b.PodExternal()
pod := b.PodExternal(claim.Name)
b.Create(tCtx, claim, pod)
b.TestPod(tCtx, pod)
@ -43,7 +43,7 @@ func coreDRA(tCtx ktesting.TContext, b *drautils.Builder) upgradedTestFunc {
// Create another claim and pod, this time using the latest Kubernetes.
claim = b.ExternalClaim()
pod = b.PodExternal()
pod = b.PodExternal(claim.Name)
pod.Spec.ResourceClaims[0].ResourceClaimName = &claim.Name
b.Create(tCtx, claim, pod)
b.TestPod(tCtx, pod)

View file

@ -0,0 +1,136 @@
/*
Copyright The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package e2edra
import (
"time"
"github.com/onsi/gomega"
resourceapi "k8s.io/api/resource/v1"
resourcealpha "k8s.io/api/resource/v1alpha3"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
drautils "k8s.io/kubernetes/test/e2e/dra/utils"
e2epod "k8s.io/kubernetes/test/e2e/framework/pod"
"k8s.io/kubernetes/test/utils/ktesting"
"k8s.io/utils/ptr"
)
// deviceTaints checks that:
// - A pod which gets scheduled on the previous release because of a toleration is kept running after an upgrade.
// - A DeviceTaintRule created to evict the pod before a downgrade prevents pod scheduling after a downgrade.
func deviceTaints(tCtx ktesting.TContext, b *drautils.Builder) upgradedTestFunc {
namespace := tCtx.Namespace()
taintKey := "devicetaints"
taintValueFromSlice := "from-slice"
taintValueFromRule := "from-rule"
taintedDevice := "tainted-device"
// We need additional devices which are only used by this test.
// We achieve that with cluster-scoped devices that start out with
// a taint.
slice := &resourceapi.ResourceSlice{
ObjectMeta: metav1.ObjectMeta{
Name: "devicetaints",
},
Spec: resourceapi.ResourceSliceSpec{
Driver: b.Driver.Name,
Pool: resourceapi.ResourcePool{
Name: "devicetaints",
ResourceSliceCount: 1,
},
AllNodes: ptr.To(true),
Devices: []resourceapi.Device{{
Name: taintedDevice,
Attributes: map[resourceapi.QualifiedName]resourceapi.DeviceAttribute{
"example.com/type": {
StringValue: ptr.To("devicetaints"),
},
},
Taints: []resourceapi.DeviceTaint{{
Key: taintKey,
Value: taintValueFromSlice,
Effect: resourceapi.DeviceTaintEffectNoSchedule,
}},
}},
},
}
_, err := tCtx.Client().ResourceV1().ResourceSlices().Create(tCtx, slice, metav1.CreateOptions{})
tCtx.ExpectNoError(err)
tCtx.Log("The pod wants exactly the tainted device -> not schedulable.")
claim := b.ExternalClaim()
pod := b.PodExternal(claim.Name)
claim.Spec.Devices.Requests[0].Exactly.Selectors = []resourceapi.DeviceSelector{{
CEL: &resourceapi.CELDeviceSelector{
Expression: `device.attributes["example.com"].?type.orValue("") == "devicetaints"`,
},
}}
b.Create(tCtx, claim, pod)
tCtx.ExpectNoError(e2epod.WaitForPodNameUnschedulableInNamespace(tCtx, tCtx.Client(), pod.Name, namespace))
tCtx.Log("Adding a toleration makes the pod schedulable.")
claim.Spec.Devices.Requests[0].Exactly.Tolerations = []resourceapi.DeviceToleration{{
Key: taintKey,
Value: taintValueFromSlice,
Effect: resourceapi.DeviceTaintEffectNoSchedule,
}}
tCtx.ExpectNoError(tCtx.Client().ResourceV1().ResourceClaims(namespace).Delete(tCtx, claim.Name, metav1.DeleteOptions{}))
_, err = tCtx.Client().ResourceV1().ResourceClaims(namespace).Create(tCtx, claim, metav1.CreateOptions{})
tCtx.ExpectNoError(err)
b.TestPod(tCtx, pod)
return func(tCtx ktesting.TContext) downgradedTestFunc {
tCtx.Log("Pod running consistently after upgrade.")
tCtx.Consistently(func(tCtx ktesting.TContext) error {
return e2epod.WaitForPodRunningInNamespace(tCtx, tCtx.Client(), pod)
}).WithTimeout(30 * time.Second).WithPolling(5 * time.Second).Should(gomega.Succeed())
tCtx.Logf("Evict pod through DeviceTaintRule.")
rule := &resourcealpha.DeviceTaintRule{
ObjectMeta: metav1.ObjectMeta{
Name: "device-taint-rule",
},
Spec: resourcealpha.DeviceTaintRuleSpec{
DeviceSelector: &resourcealpha.DeviceTaintSelector{
Driver: &b.Driver.Name,
Pool: &slice.Spec.Pool.Name,
Device: &taintedDevice,
},
Taint: resourcealpha.DeviceTaint{
Key: taintKey,
Value: taintValueFromRule,
Effect: resourcealpha.DeviceTaintEffectNoExecute,
},
},
}
_, err := tCtx.Client().ResourceV1alpha3().DeviceTaintRules().Create(tCtx, rule, metav1.CreateOptions{})
tCtx.ExpectNoError(err)
tCtx.ExpectNoError(e2epod.WaitForPodNotFoundInNamespace(tCtx, tCtx.Client(), pod.Name, namespace, 5*time.Minute))
return func(tCtx ktesting.TContext) {
tCtx.Log("DeviceTaintRule still in effect.")
b.Create(tCtx, pod)
tCtx.ExpectNoError(e2epod.WaitForPodNameUnschedulableInNamespace(tCtx, tCtx.Client(), pod.Name, namespace))
// We must clean up manually, otherwise the code which checks for ResourceSlice deletion after
// driver removal gets stuck waiting for the removal of special ResourceSlice.
// This cannot be scheduled via tCtx.Cleanup after creating it because then it would be removed
// after the first sub-test.
tCtx.ExpectNoError(tCtx.Client().ResourceV1().ResourceSlices().Delete(tCtx, slice.Name, metav1.DeleteOptions{}))
}
}
}

View file

@ -66,7 +66,8 @@ func init() {
// sub-test. That function then returns the next piece of code, which then
// returns the final code. Each callback function is executed as a sub-test.
// The builder is configured to not delete objects when that sub-test ends,
// so objects persist until the entire test is done.
// so objects persist until the entire test is done. The same DRA driver
// is used for all sub-tests.
//
// Each sub-test must be self-contained. They intentionally run in a random
// order. However, they share the same cluster and the 8 devices which are
@ -74,6 +75,7 @@ func init() {
var subTests = map[string]initialTestFunc{
"core DRA": coreDRA,
"ResourceClaim device status": resourceClaimDeviceStatus,
"DeviceTaints": deviceTaints,
}
type initialTestFunc func(tCtx ktesting.TContext, builder *drautils.Builder) upgradedTestFunc
@ -210,8 +212,8 @@ func testUpgradeDowngrade(tCtx ktesting.TContext) {
tCtx.Step(fmt.Sprintf("bring up v%d.%d", major, previousMinor), func(tCtx ktesting.TContext) {
cluster = localupcluster.New(tCtx)
localUpClusterEnv := map[string]string{
"RUNTIME_CONFIG": "resource.k8s.io/v1beta1,resource.k8s.io/v1beta2",
"FEATURE_GATES": "DynamicResourceAllocation=true",
"RUNTIME_CONFIG": "resource.k8s.io/v1beta1,resource.k8s.io/v1beta2,resource.k8s.io/v1alpha3",
"FEATURE_GATES": "DynamicResourceAllocation=true,DRADeviceTaintRules=true,DRADeviceTaints=true",
// *not* needed because driver will run in "local filesystem" mode (= driver.IsLocal): "ALLOW_PRIVILEGED": "1",
}
cluster.Start(tCtx, binDir, localUpClusterEnv)
@ -247,6 +249,7 @@ func testUpgradeDowngrade(tCtx ktesting.TContext) {
})
}
})
numSlices := len(driver.NewGetSlices()(tCtx).Items)
// We could split this up into first updating the apiserver, then control plane components, then restarting kubelet.
// For the purpose of this test here we we primarily care about full before/after comparisons, so not done yet.
@ -255,7 +258,7 @@ func testUpgradeDowngrade(tCtx ktesting.TContext) {
// The kubelet wipes all ResourceSlices on a restart because it doesn't know which drivers were running.
// Wait for the ResourceSlice controller in the driver to notice and recreate the ResourceSlices.
tCtx.WithStep("wait for ResourceSlices").Eventually(driver.NewGetSlices()).WithTimeout(5 * time.Minute).Should(gomega.HaveField("Items", gomega.HaveLen(len(nodes.NodeNames))))
tCtx.WithStep("wait for ResourceSlices").Eventually(driver.NewGetSlices()).WithTimeout(5 * time.Minute).Should(gomega.HaveField("Items", gomega.HaveLen(numSlices)))
downgradedTestFuncs := make(map[string]downgradedTestFunc, len(subTests))
tCtx.Run("after-cluster-upgrade", func(tCtx ktesting.TContext) {

View file

@ -57,7 +57,8 @@ func testShareResourceClaimSequentially(tCtx ktesting.TContext) {
b := drautils.NewBuilderNow(tCtx, driver)
var objects []klog.KMetadata
objects = append(objects, b.ExternalClaim())
claim := b.ExternalClaim()
objects = append(objects, claim)
// This test used to test usage of the claim by one pod
// at a time. After removing the "not sharable"
@ -67,7 +68,7 @@ func testShareResourceClaimSequentially(tCtx ktesting.TContext) {
tCtx.Logf("Creating %d pods sharing the same claim", numMaxPods)
pods := make([]*v1.Pod, numMaxPods)
for i := range numMaxPods {
pod := b.PodExternal()
pod := b.PodExternal(claim.Name)
pods[i] = pod
objects = append(objects, pod)
}
@ -130,7 +131,7 @@ func testShareResourceClaimSequentially(tCtx ktesting.TContext) {
morePods := make([]*v1.Pod, numMorePods)
objects = nil
for i := range numMorePods {
pod := b.PodExternal()
pod := b.PodExternal(claim.Name)
morePods[i] = pod
objects = append(objects, pod)
}