From 65ef31973cd621e51b4ebe0012c2eeaf369097c1 Mon Sep 17 00:00:00 2001 From: Patrick Ohly Date: Fri, 5 Dec 2025 08:20:07 +0100 Subject: [PATCH] DRA upgrade/downgrade: split out individual test steps This approach with collecting results from callbacks in a main ginkgo.It and using them as failures in separate ginkgo.It callbacks might be the best that can be done with Ginkgo. A better solution is probably Go unit tests with sub-tests. --- test/e2e_dra/coredra_test.go | 74 +++++ test/e2e_dra/resourceclaimstatus_test.go | 235 ++++++++++++++ test/e2e_dra/upgradedowngrade_test.go | 370 ++++++++--------------- 3 files changed, 430 insertions(+), 249 deletions(-) create mode 100644 test/e2e_dra/coredra_test.go create mode 100644 test/e2e_dra/resourceclaimstatus_test.go diff --git a/test/e2e_dra/coredra_test.go b/test/e2e_dra/coredra_test.go new file mode 100644 index 00000000000..c2024b28ed5 --- /dev/null +++ b/test/e2e_dra/coredra_test.go @@ -0,0 +1,74 @@ +/* +Copyright The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package e2edra + +import ( + "github.com/onsi/gomega" + v1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + drautils "k8s.io/kubernetes/test/e2e/dra/utils" + "k8s.io/kubernetes/test/e2e/framework" + e2epod "k8s.io/kubernetes/test/e2e/framework/pod" + "k8s.io/kubernetes/test/utils/ktesting" +) + +func coreDRA(tCtx ktesting.TContext, f *framework.Framework, b *drautils.Builder) step2Func { + namespace := f.Namespace.Name + claim := b.ExternalClaim() + pod := b.PodExternal() + b.Create(tCtx, claim, pod) + b.TestPod(tCtx, f, pod) + + return func(tCtx ktesting.TContext) step3Func { + // Remove pod prepared in step 1. + framework.ExpectNoError(f.ClientSet.ResourceV1beta1().ResourceClaims(namespace).Delete(tCtx, claim.Name, metav1.DeleteOptions{})) + framework.ExpectNoError(f.ClientSet.CoreV1().Pods(namespace).Delete(tCtx, pod.Name, metav1.DeleteOptions{})) + framework.ExpectNoError(e2epod.WaitForPodNotFoundInNamespace(tCtx, f.ClientSet, pod.Name, namespace, f.Timeouts.PodDelete)) + + // Create another claim and pod, this time using the latest Kubernetes. + claim = b.ExternalClaim() + pod = b.PodExternal() + pod.Spec.ResourceClaims[0].ResourceClaimName = &claim.Name + b.Create(tCtx, claim, pod) + b.TestPod(tCtx, f, pod) + + return func(tCtx ktesting.TContext) { + // We need to clean up explicitly because the normal + // cleanup doesn't work (driver shuts down first). + // + // The retry loops are necessary because of a stale connection + // to the restarted apiserver. Sometimes, attempts fail with "EOF" as error + // or (even weirder) with + // getting *v1.Pod: pods "tester-2" is forbidden: User "kubernetes-admin" cannot get resource "pods" in API group "" in the namespace "dra-9021" + ktesting.Eventually(tCtx, func(tCtx ktesting.TContext) error { + return f.ClientSet.ResourceV1beta1().ResourceClaims(namespace).Delete(tCtx, claim.Name, metav1.DeleteOptions{}) + }).Should(gomega.Succeed(), "delete claim after downgrade") + ktesting.Eventually(tCtx, func(tCtx ktesting.TContext) error { + return f.ClientSet.CoreV1().Pods(namespace).Delete(tCtx, pod.Name, metav1.DeleteOptions{}) + }).Should(gomega.Succeed(), "delete pod after downgrade") + ktesting.Eventually(tCtx, func(tCtx ktesting.TContext) *v1.Pod { + pod, err := f.ClientSet.CoreV1().Pods(namespace).Get(tCtx, pod.Name, metav1.GetOptions{}) + if apierrors.IsNotFound(err) { + return nil + } + tCtx.ExpectNoError(err, "get pod") + return pod + }).Should(gomega.BeNil(), "no pod after deletion after downgrade") + } + } +} diff --git a/test/e2e_dra/resourceclaimstatus_test.go b/test/e2e_dra/resourceclaimstatus_test.go new file mode 100644 index 00000000000..d4a65624628 --- /dev/null +++ b/test/e2e_dra/resourceclaimstatus_test.go @@ -0,0 +1,235 @@ +/* +Copyright The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package e2edra + +import ( + "bytes" + "encoding/json" + + "github.com/stretchr/testify/require" + + resourceapi "k8s.io/api/resource/v1" + resourceapiv1beta2 "k8s.io/api/resource/v1beta2" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + apiruntime "k8s.io/apimachinery/pkg/runtime" + resourceapiac "k8s.io/client-go/applyconfigurations/resource/v1" + resourceapiacv1beta2 "k8s.io/client-go/applyconfigurations/resource/v1beta2" + draapiv1beta2 "k8s.io/dynamic-resource-allocation/api/v1beta2" + drautils "k8s.io/kubernetes/test/e2e/dra/utils" + "k8s.io/kubernetes/test/e2e/framework" + "k8s.io/kubernetes/test/utils/ktesting" +) + +// resourceClaimDeviceStatus corresponds to testResourceClaimDeviceStatus in test/integration/dra +// and was copied from there, therefore the unit-test style with tCtx and require. +// This is the preferred style for new tests. +func resourceClaimDeviceStatus(tCtx ktesting.TContext, f *framework.Framework, b *drautils.Builder) step2Func { + namespace := f.Namespace.Name + claimName := "claim-with-device-status" + claim := &resourceapiv1beta2.ResourceClaim{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: namespace, + Name: claimName, + }, + Spec: resourceapiv1beta2.ResourceClaimSpec{ + Devices: resourceapiv1beta2.DeviceClaim{ + Requests: []resourceapiv1beta2.DeviceRequest{ + { + Name: "foo", + Exactly: &resourceapiv1beta2.ExactDeviceRequest{ + DeviceClassName: "foo", + }, + }, + }, + }, + }, + } + + claim, err := tCtx.Client().ResourceV1beta2().ResourceClaims(namespace).Create(tCtx, claim, metav1.CreateOptions{}) + tCtx.ExpectNoError(err, "create ResourceClaim") + + // Add an allocation result. + // A finalizer is required for that. + finalizer := "test.example.com/my-test-finalizer" + claim.Finalizers = append(claim.Finalizers, finalizer) + claim, err = tCtx.Client().ResourceV1beta2().ResourceClaims(namespace).Update(tCtx, claim, metav1.UpdateOptions{}) + claim.Status.Allocation = &resourceapiv1beta2.AllocationResult{ + Devices: resourceapiv1beta2.DeviceAllocationResult{ + Results: []resourceapiv1beta2.DeviceRequestAllocationResult{ + { + Request: "foo", + Driver: "one", + Pool: "global", + Device: "my-device", + }, + { + Request: "foo", + Driver: "two", + Pool: "global", + Device: "another-device", + }, + { + Request: "foo", + Driver: "three", + Pool: "global", + Device: "my-device", + }, + }, + }, + } + tCtx.ExpectNoError(err, "add finalizer") + removeClaim := func(tCtx ktesting.TContext) { + client := tCtx.Client().ResourceV1beta2() + claim, err := client.ResourceClaims(namespace).Get(tCtx, claimName, metav1.GetOptions{}) + if apierrors.IsNotFound(err) { + return + } + tCtx.ExpectNoError(err, "get claim to remove finalizer") + if claim.Status.Allocation != nil { + claim.Status.Allocation = nil + claim, err = client.ResourceClaims(namespace).UpdateStatus(tCtx, claim, metav1.UpdateOptions{}) + tCtx.ExpectNoError(err, "remove allocation") + } + claim.Finalizers = nil + claim, err = client.ResourceClaims(namespace).Update(tCtx, claim, metav1.UpdateOptions{}) + tCtx.ExpectNoError(err, "remove finalizer") + err = client.ResourceClaims(namespace).Delete(tCtx, claim.Name, metav1.DeleteOptions{}) + tCtx.ExpectNoError(err, "delete claim") + } + tCtx.CleanupCtx(removeClaim) + claim, err = tCtx.Client().ResourceV1beta2().ResourceClaims(namespace).UpdateStatus(tCtx, claim, metav1.UpdateOptions{}) + tCtx.ExpectNoError(err, "add allocation result") + + // Now adding the device status should work. + deviceStatus := []resourceapiv1beta2.AllocatedDeviceStatus{{ + Driver: "one", + Pool: "global", + Device: "my-device", + Data: &apiruntime.RawExtension{ + Raw: []byte(`{"kind": "foo", "apiVersion": "dra.example.com/v1"}`), + }, + NetworkData: &resourceapiv1beta2.NetworkDeviceData{ + InterfaceName: "net-1", + IPs: []string{ + "10.9.8.0/24", + "2001:db8::/64", + }, + HardwareAddress: "ea:9f:cb:40:b1:7b", + }, + }} + claim.Status.Devices = deviceStatus + tCtx.ExpectNoError(err, "add device status") + require.Equal(tCtx, deviceStatus, claim.Status.Devices, "after adding device status") + + // Strip the RawExtension. SSA re-encodes it, which causes negligble differences that nonetheless break assert.Equal. + claim.Status.Devices[0].Data = nil + deviceStatus[0].Data = nil + claim, err = tCtx.Client().ResourceV1beta2().ResourceClaims(namespace).UpdateStatus(tCtx, claim, metav1.UpdateOptions{}) + tCtx.ExpectNoError(err, "add device status") + require.Equal(tCtx, deviceStatus, claim.Status.Devices, "after stripping RawExtension") + + // Exercise SSA. + deviceStatusAC := resourceapiacv1beta2.AllocatedDeviceStatus(). + WithDriver("two"). + WithPool("global"). + WithDevice("another-device"). + WithNetworkData(resourceapiacv1beta2.NetworkDeviceData().WithInterfaceName("net-2")) + deviceStatus = append(deviceStatus, resourceapiv1beta2.AllocatedDeviceStatus{ + Driver: "two", + Pool: "global", + Device: "another-device", + NetworkData: &resourceapiv1beta2.NetworkDeviceData{ + InterfaceName: "net-2", + }, + }) + claimAC := resourceapiacv1beta2.ResourceClaim(claim.Name, claim.Namespace). + WithStatus(resourceapiacv1beta2.ResourceClaimStatus().WithDevices(deviceStatusAC)) + claim, err = tCtx.Client().ResourceV1beta2().ResourceClaims(namespace).ApplyStatus(tCtx, claimAC, metav1.ApplyOptions{ + Force: true, + FieldManager: "manager-1", + }) + tCtx.ExpectNoError(err, "apply device status two") + require.Equal(tCtx, deviceStatus, claim.Status.Devices, "after applying device status two") + + deviceStatusAC = resourceapiacv1beta2.AllocatedDeviceStatus(). + WithDriver("three"). + WithPool("global"). + WithDevice("my-device"). + WithNetworkData(resourceapiacv1beta2.NetworkDeviceData().WithInterfaceName("net-3")) + deviceStatus = append(deviceStatus, resourceapiv1beta2.AllocatedDeviceStatus{ + Driver: "three", + Pool: "global", + Device: "my-device", + NetworkData: &resourceapiv1beta2.NetworkDeviceData{ + InterfaceName: "net-3", + }, + }) + claimAC = resourceapiacv1beta2.ResourceClaim(claim.Name, claim.Namespace). + WithStatus(resourceapiacv1beta2.ResourceClaimStatus().WithDevices(deviceStatusAC)) + claim, err = tCtx.Client().ResourceV1beta2().ResourceClaims(namespace).ApplyStatus(tCtx, claimAC, metav1.ApplyOptions{ + Force: true, + FieldManager: "manager-2", + }) + tCtx.ExpectNoError(err, "apply device status three") + require.Equal(tCtx, deviceStatus, claim.Status.Devices, "after applying device status three") + var buffer bytes.Buffer + encoder := json.NewEncoder(&buffer) + encoder.SetIndent(" ", " ") + tCtx.ExpectNoError(encoder.Encode(claim)) + tCtx.Logf("Final ResourceClaim:\n%s", buffer.String()) + + return func(tCtx ktesting.TContext) step3Func { + // Update one entry, remove the other. + deviceStatusAC := resourceapiac.AllocatedDeviceStatus(). + WithDriver("two"). + WithPool("global"). + WithDevice("another-device"). + WithNetworkData(resourceapiac.NetworkDeviceData().WithInterfaceName("yet-another-net")) + deviceStatus[1].NetworkData.InterfaceName = "yet-another-net" + claimAC := resourceapiac.ResourceClaim(claim.Name, claim.Namespace). + WithStatus(resourceapiac.ResourceClaimStatus().WithDevices(deviceStatusAC)) + claim, err := tCtx.Client().ResourceV1().ResourceClaims(namespace).ApplyStatus(tCtx, claimAC, metav1.ApplyOptions{ + Force: true, + FieldManager: "manager-1", + }) + tCtx.ExpectNoError(err, "update device status two") + + var deviceStatusV1 []resourceapi.AllocatedDeviceStatus + for _, status := range deviceStatus { + var statusV1 resourceapi.AllocatedDeviceStatus + tCtx.ExpectNoError(draapiv1beta2.Convert_v1beta2_AllocatedDeviceStatus_To_v1_AllocatedDeviceStatus(&status, &statusV1, nil)) + deviceStatusV1 = append(deviceStatusV1, statusV1) + } + require.Equal(tCtx, deviceStatusV1, claim.Status.Devices, "after updating device status two") + + return func(tCtx ktesting.TContext) { + claimAC := resourceapiacv1beta2.ResourceClaim(claim.Name, claim.Namespace) + deviceStatus = deviceStatus[0:2] + claim, err := tCtx.Client().ResourceV1beta2().ResourceClaims(namespace).ApplyStatus(tCtx, claimAC, metav1.ApplyOptions{ + Force: true, + FieldManager: "manager-2", + }) + tCtx.ExpectNoError(err, "remove device status three") + require.Equal(tCtx, deviceStatus, claim.Status.Devices, "after removing device status three") + + // The cleanup order is so that we have to run this explicitly now. + // The tCtx.CleanupCtx is more for the sake of completeness. + removeClaim(tCtx) + } + } +} diff --git a/test/e2e_dra/upgradedowngrade_test.go b/test/e2e_dra/upgradedowngrade_test.go index 1be909178c1..08d09cb349d 100644 --- a/test/e2e_dra/upgradedowngrade_test.go +++ b/test/e2e_dra/upgradedowngrade_test.go @@ -18,11 +18,9 @@ package e2edra import ( "archive/tar" - "bytes" "compress/gzip" "context" _ "embed" - "encoding/json" "flag" "fmt" "io" @@ -38,24 +36,13 @@ import ( "github.com/onsi/ginkgo/v2" "github.com/onsi/gomega" - "github.com/stretchr/testify/require" - v1 "k8s.io/api/core/v1" - resourceapi "k8s.io/api/resource/v1" - resourceapiv1beta2 "k8s.io/api/resource/v1beta2" - apierrors "k8s.io/apimachinery/pkg/api/errors" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - apiruntime "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/util/version" - resourceapiac "k8s.io/client-go/applyconfigurations/resource/v1" - resourceapiacv1beta2 "k8s.io/client-go/applyconfigurations/resource/v1beta2" restclient "k8s.io/client-go/rest" - draapiv1beta2 "k8s.io/dynamic-resource-allocation/api/v1beta2" "k8s.io/kubernetes/cmd/kubeadm/app/util/errors" drautils "k8s.io/kubernetes/test/e2e/dra/utils" "k8s.io/kubernetes/test/e2e/framework" e2enode "k8s.io/kubernetes/test/e2e/framework/node" - e2epod "k8s.io/kubernetes/test/e2e/framework/pod" e2etestfiles "k8s.io/kubernetes/test/e2e/framework/testfiles" "k8s.io/kubernetes/test/utils/ktesting" "k8s.io/kubernetes/test/utils/localupcluster" @@ -69,6 +56,45 @@ func init() { ktesting.SetDefaultVerbosity(2) } +// The overall flow of upgrade/downgrade testing is always the same: +// +// - Bring up a cluster with the previous release. +// - "Install" the test DRA driver with 8 devices for the one node in the cluster. +// There is a DeviceClass for it. +// - Step 1: run some test code. +// - Upgrade the cluster to the current code. +// - Step 2: run some more test code. +// - Downgrade to the previous release again. +// - Step 3: run some final test code. +// +// The "test code" gets registered here with a single function for each +// sub-test. That function then returns the next piece of code, which then +// returns the final code. +// +// For performance reasons there is only a single `It("works")` which +// runs everything. Failures in sub-tests are reported separately *if they +// are reported via the TContext*. Failures reported via ginkgo.Fail +// currently abort the entire test. This will be addressed by converting +// everything to ktesting-based unit tests. +// +// Each sub-test must be self-contained. They intentionally run in a random +// order. However, they share the same cluster and the 8 devices which are +// available there. +var subTests = map[string]step1Func{ + "core DRA": coreDRA, + "ResourceClaim device status": resourceClaimDeviceStatus, +} + +// step1Func is passed everything that is needed to run +type step1Func func(tCtx ktesting.TContext, f *framework.Framework, builder *drautils.Builder) step2Func + +type step2Func func(tCtx ktesting.TContext) step3Func + +type step3Func func(tCtx ktesting.TContext) + +// steps has the names for the actual ginkgo.It where sub-test results are provided. +var steps = []string{"before downgrade", "after downgrade", "after upgrade"} + var repoRoot = repoRootDefault() func currentBinDir() (envName, content string) { @@ -93,6 +119,7 @@ func repoRootDefault() string { func TestUpgradeDowngrade(t *testing.T) { suiteConfig, reporterConfig := framework.CreateGinkgoConfig() + suiteConfig.RandomizeAllSpecs = false ginkgo.RunSpecs(t, "DRA", suiteConfig, reporterConfig) } @@ -106,6 +133,14 @@ var _ = ginkgo.Describe("DRA upgrade/downgrade", func() { e2etestfiles.AddFileSource(e2etestfiles.RootFileSource{Root: repoRoot}) gomega.RegisterFailHandler(ginkgo.Fail) + // sub-test -> step -> failure + // + // Initially each failure string is empty. + results := make(map[string]map[string]string, len(subTests)) + for subTest := range subTests { + results[subTest] = make(map[string]string, len(steps)) + } + ginkgo.It("works", func(ctx context.Context) { // TODO: replace with helper code from https://github.com/kubernetes/kubernetes/pull/122481 should that get merged. tCtx := ktesting.Init(GinkgoContextTB()) @@ -232,30 +267,40 @@ var _ = ginkgo.Describe("DRA upgrade/downgrade", func() { tCtx.ExpectNoError(e2enode.WaitForAllNodesSchedulable(tCtx, tCtx.Client(), f.Timeouts.NodeSchedulable), "wait for all nodes to be schedulable") nodes := drautils.NewNodesNow(tCtx, f, 1, 1) - testResourceClaimDeviceStatusAfterUpgrade, testResourceClaimDeviceStatusAfterDowngrade := testResourceClaimDeviceStatus(tCtx, namespace.Name) // Opening sockets locally avoids intermittent errors and delays caused by proxying through the restarted apiserver. // We could speed up testing by shortening the sync delay in the ResourceSlice controller, but let's better // test the defaults. driver := drautils.NewDriverInstance(f) driver.IsLocal = true - driver.Run(nodes, drautils.DriverResourcesNow(nodes, 1)) + driver.Run(nodes, drautils.DriverResourcesNow(nodes, 8)) b := drautils.NewBuilderNow(ctx, f, driver) - claim := b.ExternalClaim() - pod := b.PodExternal() - b.Create(ctx, claim, pod) - b.TestPod(ctx, f, pod) - tCtx = ktesting.End(tCtx) + steps2 := make(map[string]step2Func, len(subTests)) + for subTest, step1 := range subTests { + tCtx = ktesting.Begin(tCtx, subTest) + var result error + func() { + tCtx, finalize := ktesting.WithError(tCtx, &result) + defer finalize() + + // This only gets set in case of success. + steps2[subTest] = step1(tCtx, f, b) + }() + if result != nil { + results[subTest][steps[0]] = result.Error() + } + tCtx = ktesting.End(tCtx) + } + tCtx = ktesting.Begin(tCtx, fmt.Sprintf("update to %s", gitVersion)) // We could split this up into first updating the apiserver, then control plane components, then restarting kubelet. // For the purpose of this test here we we primarily care about full before/after comparisons, so not done yet. // TODO restoreOptions := cluster.Modify(tCtx, localupcluster.ModifyOptions{Upgrade: true, BinDir: dir}) tCtx = ktesting.End(tCtx) - testResourceClaimDeviceStatusAfterUpgrade() // The kubelet wipes all ResourceSlices on a restart because it doesn't know which drivers were running. // Wait for the ResourceSlice controller in the driver to notice and recreate the ResourceSlices. @@ -263,17 +308,26 @@ var _ = ginkgo.Describe("DRA upgrade/downgrade", func() { gomega.Eventually(ctx, driver.NewGetSlices()).WithTimeout(5 * time.Minute).Should(gomega.HaveField("Items", gomega.HaveLen(len(nodes.NodeNames)))) tCtx = ktesting.End(tCtx) - // Remove pod prepared by previous Kubernetes. - framework.ExpectNoError(f.ClientSet.ResourceV1beta1().ResourceClaims(namespace.Name).Delete(ctx, claim.Name, metav1.DeleteOptions{})) - framework.ExpectNoError(f.ClientSet.CoreV1().Pods(namespace.Name).Delete(ctx, pod.Name, metav1.DeleteOptions{})) - framework.ExpectNoError(e2epod.WaitForPodNotFoundInNamespace(ctx, f.ClientSet, pod.Name, namespace.Name, f.Timeouts.PodDelete)) + steps3 := make(map[string]step3Func, len(subTests)) + for subTest := range subTests { + step2 := steps2[subTest] + if step2 == nil { + continue + } + tCtx = ktesting.Begin(tCtx, subTest) + var result error + func() { + tCtx, finalize := ktesting.WithError(tCtx, &result) + defer finalize() - // Create another claim and pod, this time using the latest Kubernetes. - claim = b.ExternalClaim() - pod = b.PodExternal() - pod.Spec.ResourceClaims[0].ResourceClaimName = &claim.Name - b.Create(ctx, claim, pod) - b.TestPod(ctx, f, pod) + // This only gets set in case of success. + steps3[subTest] = step2(tCtx) + }() + if result != nil { + results[subTest][steps[0]] = result.Error() + } + tCtx = ktesting.End(tCtx) + } // Roll back. tCtx = ktesting.Begin(tCtx, "downgrade") @@ -289,30 +343,45 @@ var _ = ginkgo.Describe("DRA upgrade/downgrade", func() { return output }).Should(gomega.ContainSubstring(`"Caches are synced" controller="resource_claim"`)) tCtx = ktesting.End(tCtx) - testResourceClaimDeviceStatusAfterDowngrade() - // We need to clean up explicitly because the normal - // cleanup doesn't work (driver shuts down first). - // - // The retry loops are necessary because of a stale connection - // to the restarted apiserver. Sometimes, attempts fail with "EOF" as error - // or (even weirder) with - // getting *v1.Pod: pods "tester-2" is forbidden: User "kubernetes-admin" cannot get resource "pods" in API group "" in the namespace "dra-9021" - ktesting.Eventually(tCtx, func(tCtx ktesting.TContext) error { - return f.ClientSet.ResourceV1beta1().ResourceClaims(namespace.Name).Delete(tCtx, claim.Name, metav1.DeleteOptions{}) - }).Should(gomega.Succeed(), "delete claim after downgrade") - ktesting.Eventually(tCtx, func(tCtx ktesting.TContext) error { - return f.ClientSet.CoreV1().Pods(namespace.Name).Delete(tCtx, pod.Name, metav1.DeleteOptions{}) - }).Should(gomega.Succeed(), "delete pod after downgrade") - ktesting.Eventually(tCtx, func(tCtx ktesting.TContext) *v1.Pod { - pod, err := f.ClientSet.CoreV1().Pods(namespace.Name).Get(tCtx, pod.Name, metav1.GetOptions{}) - if apierrors.IsNotFound(err) { - return nil + for subTest := range subTests { + step3 := steps3[subTest] + if step3 == nil { + continue } - tCtx.ExpectNoError(err, "get pod") - return pod - }).Should(gomega.BeNil(), "no pod after deletion after downgrade") + tCtx = ktesting.Begin(tCtx, subTest) + var result error + func() { + tCtx, finalize := ktesting.WithError(tCtx, &result) + defer finalize() + + step3(tCtx) + }() + if result != nil { + results[subTest][steps[0]] = result.Error() + } + tCtx = ktesting.End(tCtx) + } }) + + // This runs last because by default Ginkgo does not randomize within + // a top-level container. + for subTest := range subTests { + ginkgo.Context(subTest, func() { + for _, step := range steps { + ginkgo.It(step, func() { + failure := results[subTest][step] + if failure != "" { + // Source code location will be useless here. We can't have both: + // separate test results *and* correct source code location. + // This will become better with testing.T-based unit tests. + _, _ = ginkgo.GinkgoWriter.Write([]byte("For log output see 'DRA upgrade/downgrade works'\n")) + ginkgo.Fail(failure) + } + }) + } + }) + } }) // sourceVersion identifies the Kubernetes git version based on hack/print-workspace-status.sh. @@ -432,200 +501,3 @@ func serverDownloadURL(tCtx ktesting.TContext, prefix string, major, minor uint) } return fmt.Sprintf("https://dl.k8s.io/release/%s/kubernetes-server-%s-%s.tar.gz", string(version), runtime.GOOS, runtime.GOARCH), string(version), nil } - -// testResourceClaimDeviceStatus corresponds to testResourceClaimDeviceStatus in test/integration/dra -// and was copied from there, therefore the unit-test style with tCtx and require. -func testResourceClaimDeviceStatus(tCtx ktesting.TContext, namespace string) (afterUpgrade, afterDowngrade func()) { - claimName := "claim-with-device-status" - claim := &resourceapiv1beta2.ResourceClaim{ - ObjectMeta: metav1.ObjectMeta{ - Namespace: namespace, - Name: claimName, - }, - Spec: resourceapiv1beta2.ResourceClaimSpec{ - Devices: resourceapiv1beta2.DeviceClaim{ - Requests: []resourceapiv1beta2.DeviceRequest{ - { - Name: "foo", - Exactly: &resourceapiv1beta2.ExactDeviceRequest{ - DeviceClassName: "foo", - }, - }, - }, - }, - }, - } - - claim, err := tCtx.Client().ResourceV1beta2().ResourceClaims(namespace).Create(tCtx, claim, metav1.CreateOptions{}) - tCtx.ExpectNoError(err, "create ResourceClaim") - - // Add an allocation result. - // A finalizer is required for that. - finalizer := "test.example.com/my-test-finalizer" - claim.Finalizers = append(claim.Finalizers, finalizer) - claim, err = tCtx.Client().ResourceV1beta2().ResourceClaims(namespace).Update(tCtx, claim, metav1.UpdateOptions{}) - claim.Status.Allocation = &resourceapiv1beta2.AllocationResult{ - Devices: resourceapiv1beta2.DeviceAllocationResult{ - Results: []resourceapiv1beta2.DeviceRequestAllocationResult{ - { - Request: "foo", - Driver: "one", - Pool: "global", - Device: "my-device", - }, - { - Request: "foo", - Driver: "two", - Pool: "global", - Device: "another-device", - }, - { - Request: "foo", - Driver: "three", - Pool: "global", - Device: "my-device", - }, - }, - }, - } - tCtx.ExpectNoError(err, "add finalizer") - removeClaim := func(tCtx ktesting.TContext) { - client := tCtx.Client().ResourceV1beta2() - claim, err := client.ResourceClaims(namespace).Get(tCtx, claimName, metav1.GetOptions{}) - if apierrors.IsNotFound(err) { - return - } - tCtx.ExpectNoError(err, "get claim to remove finalizer") - if claim.Status.Allocation != nil { - claim.Status.Allocation = nil - claim, err = client.ResourceClaims(namespace).UpdateStatus(tCtx, claim, metav1.UpdateOptions{}) - tCtx.ExpectNoError(err, "remove allocation") - } - claim.Finalizers = nil - claim, err = client.ResourceClaims(namespace).Update(tCtx, claim, metav1.UpdateOptions{}) - tCtx.ExpectNoError(err, "remove finalizer") - err = client.ResourceClaims(namespace).Delete(tCtx, claim.Name, metav1.DeleteOptions{}) - tCtx.ExpectNoError(err, "delete claim") - } - tCtx.CleanupCtx(removeClaim) - claim, err = tCtx.Client().ResourceV1beta2().ResourceClaims(namespace).UpdateStatus(tCtx, claim, metav1.UpdateOptions{}) - tCtx.ExpectNoError(err, "add allocation result") - - // Now adding the device status should work. - deviceStatus := []resourceapiv1beta2.AllocatedDeviceStatus{{ - Driver: "one", - Pool: "global", - Device: "my-device", - Data: &apiruntime.RawExtension{ - Raw: []byte(`{"kind": "foo", "apiVersion": "dra.example.com/v1"}`), - }, - NetworkData: &resourceapiv1beta2.NetworkDeviceData{ - InterfaceName: "net-1", - IPs: []string{ - "10.9.8.0/24", - "2001:db8::/64", - }, - HardwareAddress: "ea:9f:cb:40:b1:7b", - }, - }} - claim.Status.Devices = deviceStatus - tCtx.ExpectNoError(err, "add device status") - require.Equal(tCtx, deviceStatus, claim.Status.Devices, "after adding device status") - - // Strip the RawExtension. SSA re-encodes it, which causes negligble differences that nonetheless break assert.Equal. - claim.Status.Devices[0].Data = nil - deviceStatus[0].Data = nil - claim, err = tCtx.Client().ResourceV1beta2().ResourceClaims(namespace).UpdateStatus(tCtx, claim, metav1.UpdateOptions{}) - tCtx.ExpectNoError(err, "add device status") - require.Equal(tCtx, deviceStatus, claim.Status.Devices, "after stripping RawExtension") - - // Exercise SSA. - deviceStatusAC := resourceapiacv1beta2.AllocatedDeviceStatus(). - WithDriver("two"). - WithPool("global"). - WithDevice("another-device"). - WithNetworkData(resourceapiacv1beta2.NetworkDeviceData().WithInterfaceName("net-2")) - deviceStatus = append(deviceStatus, resourceapiv1beta2.AllocatedDeviceStatus{ - Driver: "two", - Pool: "global", - Device: "another-device", - NetworkData: &resourceapiv1beta2.NetworkDeviceData{ - InterfaceName: "net-2", - }, - }) - claimAC := resourceapiacv1beta2.ResourceClaim(claim.Name, claim.Namespace). - WithStatus(resourceapiacv1beta2.ResourceClaimStatus().WithDevices(deviceStatusAC)) - claim, err = tCtx.Client().ResourceV1beta2().ResourceClaims(namespace).ApplyStatus(tCtx, claimAC, metav1.ApplyOptions{ - Force: true, - FieldManager: "manager-1", - }) - tCtx.ExpectNoError(err, "apply device status two") - require.Equal(tCtx, deviceStatus, claim.Status.Devices, "after applying device status two") - - deviceStatusAC = resourceapiacv1beta2.AllocatedDeviceStatus(). - WithDriver("three"). - WithPool("global"). - WithDevice("my-device"). - WithNetworkData(resourceapiacv1beta2.NetworkDeviceData().WithInterfaceName("net-3")) - deviceStatus = append(deviceStatus, resourceapiv1beta2.AllocatedDeviceStatus{ - Driver: "three", - Pool: "global", - Device: "my-device", - NetworkData: &resourceapiv1beta2.NetworkDeviceData{ - InterfaceName: "net-3", - }, - }) - claimAC = resourceapiacv1beta2.ResourceClaim(claim.Name, claim.Namespace). - WithStatus(resourceapiacv1beta2.ResourceClaimStatus().WithDevices(deviceStatusAC)) - claim, err = tCtx.Client().ResourceV1beta2().ResourceClaims(namespace).ApplyStatus(tCtx, claimAC, metav1.ApplyOptions{ - Force: true, - FieldManager: "manager-2", - }) - tCtx.ExpectNoError(err, "apply device status three") - require.Equal(tCtx, deviceStatus, claim.Status.Devices, "after applying device status three") - var buffer bytes.Buffer - encoder := json.NewEncoder(&buffer) - encoder.SetIndent(" ", " ") - tCtx.ExpectNoError(encoder.Encode(claim)) - tCtx.Logf("Final ResourceClaim:\n%s", buffer.String()) - - afterUpgrade = func() { - // Update one entry, remove the other. - deviceStatusAC := resourceapiac.AllocatedDeviceStatus(). - WithDriver("two"). - WithPool("global"). - WithDevice("another-device"). - WithNetworkData(resourceapiac.NetworkDeviceData().WithInterfaceName("yet-another-net")) - deviceStatus[1].NetworkData.InterfaceName = "yet-another-net" - claimAC := resourceapiac.ResourceClaim(claim.Name, claim.Namespace). - WithStatus(resourceapiac.ResourceClaimStatus().WithDevices(deviceStatusAC)) - claim, err := tCtx.Client().ResourceV1().ResourceClaims(namespace).ApplyStatus(tCtx, claimAC, metav1.ApplyOptions{ - Force: true, - FieldManager: "manager-1", - }) - tCtx.ExpectNoError(err, "update device status two") - - var deviceStatusV1 []resourceapi.AllocatedDeviceStatus - for _, status := range deviceStatus { - var statusV1 resourceapi.AllocatedDeviceStatus - tCtx.ExpectNoError(draapiv1beta2.Convert_v1beta2_AllocatedDeviceStatus_To_v1_AllocatedDeviceStatus(&status, &statusV1, nil)) - deviceStatusV1 = append(deviceStatusV1, statusV1) - } - require.Equal(tCtx, deviceStatusV1, claim.Status.Devices, "after updating device status two") - } - afterDowngrade = func() { - claimAC := resourceapiacv1beta2.ResourceClaim(claim.Name, claim.Namespace) - deviceStatus = deviceStatus[0:2] - claim, err := tCtx.Client().ResourceV1beta2().ResourceClaims(namespace).ApplyStatus(tCtx, claimAC, metav1.ApplyOptions{ - Force: true, - FieldManager: "manager-2", - }) - tCtx.ExpectNoError(err, "remove device status three") - require.Equal(tCtx, deviceStatus, claim.Status.Devices, "after removing device status three") - - // The cleanup order is so that we have to run this explicitly now. - // The tCtx.CleanupCtx is more for the sake of completeness. - removeClaim(tCtx) - } - return -}