From 2e543d151b6f58ac6354a398282a43d4cca94cb5 Mon Sep 17 00:00:00 2001 From: Patrick Ohly Date: Wed, 15 Oct 2025 12:12:13 +0200 Subject: [PATCH 01/11] DRA device taints: convert unit test to synctest The immediate benefit is that the time required for running the package's unit test goes down from ~10 seconds (because of required real-world delays) to ~0.5 seconds (depending on the CPU performance of the host). It can also make writing tests easier because after a `Wait` there is no need for locking before accessing internal state (all background goroutines are known to be blocked waiting for the main goroutine). What somewhat ruins the perfect determinism is the polling for informer cache syncs: that can take an unknown number of loop iterations. Probably could be fixed by making the waiting block on channels (requires work in client-go). The only change required in the implementation is avoiding the sleep when deleting a pod failed for the last time in the loop (a useful, albeit minor improvement by itself): the test proceeds after having blocked that last Delete call, in which case synctest expects the background goroutine to exit without delay. --- .../device_taint_eviction.go | 10 +- .../device_taint_eviction_test.go | 493 +++++++++--------- 2 files changed, 261 insertions(+), 242 deletions(-) diff --git a/pkg/controller/devicetainteviction/device_taint_eviction.go b/pkg/controller/devicetainteviction/device_taint_eviction.go index 72562a423fa..103a0d1a161 100644 --- a/pkg/controller/devicetainteviction/device_taint_eviction.go +++ b/pkg/controller/devicetainteviction/device_taint_eviction.go @@ -218,9 +218,11 @@ type allocatedClaim struct { func (tc *Controller) deletePodHandler(c clientset.Interface, emitEventFunc func(tainteviction.NamespacedObject)) func(ctx context.Context, fireAt time.Time, args *tainteviction.WorkArgs) error { return func(ctx context.Context, fireAt time.Time, args *tainteviction.WorkArgs) error { - klog.FromContext(ctx).Info("Deleting pod", "pod", args.Object) var err error for i := 0; i < retries; i++ { + if i > 0 { + time.Sleep(10 * time.Millisecond) + } err = addConditionAndDeletePod(ctx, c, args.Object, &emitEventFunc) if apierrors.IsNotFound(err) { // Not a problem, the work is done. @@ -229,11 +231,13 @@ func (tc *Controller) deletePodHandler(c clientset.Interface, emitEventFunc func return nil } if err == nil { + podDeletionLatency := time.Since(fireAt) + // TODO: include more information why it was evicted. + klog.FromContext(ctx).Info("Evicted pod by deleting it", "pod", args.Object, "latency", podDeletionLatency) tc.metrics.PodDeletionsTotal.Inc() - tc.metrics.PodDeletionsLatency.Observe(float64(time.Since(fireAt).Seconds())) + tc.metrics.PodDeletionsLatency.Observe(float64(podDeletionLatency.Seconds())) return nil } - time.Sleep(10 * time.Millisecond) } return err } diff --git a/pkg/controller/devicetainteviction/device_taint_eviction_test.go b/pkg/controller/devicetainteviction/device_taint_eviction_test.go index 1aee69fbfe6..b9024b58b28 100644 --- a/pkg/controller/devicetainteviction/device_taint_eviction_test.go +++ b/pkg/controller/devicetainteviction/device_taint_eviction_test.go @@ -58,8 +58,7 @@ import ( ) // setup creates a controller which is ready to have its handle* methods called. -func setup(tb testing.TB) *testContext { - tCtx := ktesting.Init(tb) +func setup(tCtx ktesting.TContext) *testContext { fakeClientset := fake.NewSimpleClientset() informerFactory := informers.NewSharedInformerFactory(fakeClientset, 0) controller := New(fakeClientset, @@ -208,10 +207,17 @@ var ( resourceName = "my-resource" claimName = podName + "-" + resourceName namespace = "default" - taintTime = metav1.Now() // This cannot be a fixed value in the past, otherwise the "seconds since taint time" delta overflows. - taintKey = "example.com/taint" - taintValue = "something" - simpleSlice = st.MakeResourceSlice(nodeName, driver). + // taintTime is the start time of a synctest bubble. + // All tests run inside such a bubble and thus have a deterministic + // delta between this taint time and their current clock. + taintTime = metav1.Time{Time: time.Date(2000, 1, 1, 0, 0, 0, 0, time.UTC)} + taintKey = "example.com/taint" + taintValue = "something" + + // All slices use the internal format. + // For client-go they get converted back to the v1 API. + + simpleSlice = st.MakeResourceSlice(nodeName, driver). Device("instance"). Obj() slice = st.MakeResourceSlice(nodeName, driver). @@ -415,8 +421,6 @@ func listEvents(tCtx ktesting.TContext) []v1.Event { // state. The final state must be the same in all permutations. This simulates // the random order in which informer updates can be perceived. func TestHandlers(t *testing.T) { - t.Parallel() - for name, tc := range map[string]testCase{ "empty": {}, "populate-pools": { @@ -1083,12 +1087,16 @@ func TestHandlers(t *testing.T) { wantEvents: []*v1.Event{cancelPodEviction}, }, } { - t.Run(name, func(t *testing.T) { + tCtx := ktesting.Init(t) + + tCtx.Run(name, func(tCtx ktesting.TContext) { numEvents := len(tc.events) if numEvents <= 1 { // No permutations. - tContext := setup(t) - testHandlers(tContext, tc) + tCtx.SyncTest("", func(tCtx ktesting.TContext) { + tContext := setup(tCtx) + testHandlers(tContext, tc) + }) return } @@ -1104,8 +1112,8 @@ func TestHandlers(t *testing.T) { tc := tc tc.events = events name := strings.Trim(fmt.Sprintf("%v", permutation), "[]") - t.Run(name, func(t *testing.T) { - tContext := setup(t) + tCtx.SyncTest(name, func(tCtx ktesting.TContext) { + tContext := setup(tCtx) testHandlers(tContext, tc) }) return @@ -1232,7 +1240,7 @@ func newTestController(tCtx ktesting.TContext, clientSet *fake.Clientset) *Contr informerFactory.Resource().V1().DeviceClasses(), "device-taint-eviction", ) - controller.metrics = metrics.New(300 /* one large initial bucket for testing */) + controller.metrics = metrics.New(300 /* one large initial bucket for testing */) // TODO: inside a synctest bubble we should have deterministic delays and shouldn't need this trick. The remaining uncertainty comes from polling for informer cache sync. // Always log, not matter what the -v value is. logger := klog.FromContext(tCtx) controller.eventLogger = &logger @@ -1240,9 +1248,16 @@ func newTestController(tCtx ktesting.TContext, clientSet *fake.Clientset) *Contr informerFactory.Start(tCtx.Done()) tCtx.Cleanup(informerFactory.Shutdown) - ktesting.Eventually(tCtx, func(tCtx ktesting.TContext) int32 { - return numWatches.Load() - }).WithTimeout(5*time.Second).Should(gomega.Equal(int32(5)), "All watches should be registered.") + tCtx.Log("starting to wait for watches") + if tCtx.IsSyncTest() { + tCtx.Wait() + require.Equal(tCtx, int32(5), numWatches.Load(), "All watches should be registered.") + } else { + ktesting.Eventually(tCtx, func(tCtx ktesting.TContext) int32 { + return numWatches.Load() + }).WithTimeout(5*time.Second).Should(gomega.Equal(int32(5)), "All watches should be registered.") + } + tCtx.Log("done waiting for watches") return controller } @@ -1284,10 +1299,11 @@ device_taint_eviction_controller_pod_deletions_total %[1]d // This scenario is the same as "evict-pod-resourceclaim" above. It also covers all // event handlers by leading to the same end state through several different combinations // of initial objects and add/update/delete calls. +// +// This runs in a bubble (https://pkg.go.dev/testing/synctest), so we can wait for goroutine +// activity to settle down and then check the state. func TestEviction(t *testing.T) { tCtx := ktesting.Init(t) - tCtx.Parallel() - do := func(tCtx ktesting.TContext, what string, action func(tCtx ktesting.TContext) error) { tCtx.Log(what) err := action(tCtx) @@ -1376,27 +1392,22 @@ func TestEviction(t *testing.T) { }, }, } { - tCtx.Run(name, func(tCtx ktesting.TContext) { - tCtx.Parallel() + tCtx.SyncTest(name, func(tCtx ktesting.TContext) { + start := time.Now() fakeClientset := fake.NewSimpleClientset(tt.initialObjects...) tCtx = ktesting.WithClients(tCtx, nil, nil, fakeClientset, nil, nil) - var mutex sync.Mutex var podUpdates int var updatedPod *v1.Pod var podDeletions int fakeClientset.PrependReactor("patch", "pods", func(action core.Action) (handled bool, ret runtime.Object, err error) { - mutex.Lock() - defer mutex.Unlock() podUpdates++ podName := action.(core.PatchAction).GetName() assert.Equal(t, podWithClaimName.Name, podName, "name of patched pod") return false, nil, nil }) fakeClientset.PrependReactor("delete", "pods", func(action core.Action) (handled bool, ret runtime.Object, err error) { - mutex.Lock() - defer mutex.Unlock() podDeletions++ podName := action.(core.DeleteAction).GetName() assert.Equal(t, podWithClaimName.Name, podName, "name of deleted pod") @@ -1420,18 +1431,37 @@ func TestEviction(t *testing.T) { }() // Eventually the controller should have synced it's informers. - ktesting.Eventually(tCtx, func(tCtx ktesting.TContext) bool { - return controller.hasSynced.Load() > 0 - }).WithTimeout(30 * time.Second).Should(gomega.BeTrueBecause("controller synced")) - if tt.afterSync != nil { - tt.afterSync(tCtx) + if false { + // This feels like it should work (controller should run until it has started up, then block durably), but it doesn't. + // Time progresses while the controller is blocked in cache.WaitForNamedCacheSyncWithContext, so this is + // probably a good place to start looking. + // TODO: make "wait for cache sync" block on a channel. Alternatively, use a context and let `context.Cause` + // report success or failure (might be too hacky). + tCtx.Wait() + if controller.hasSynced.Load() <= 0 { + tCtx.Fatal("controller should have synced") + } + } else { + ktesting.Eventually(tCtx, func(tCtx ktesting.TContext) bool { + return controller.hasSynced.Load() > 0 + }).WithTimeout(30 * time.Second).Should(gomega.BeTrueBecause("controller synced")) + if tt.afterSync != nil { + tt.afterSync(tCtx) + } } // Eventually the pod gets deleted (= evicted). - ktesting.Eventually(tCtx, func(tCtx ktesting.TContext) bool { - _, err := fakeClientset.CoreV1().Pods(pod.Namespace).Get(tCtx, pod.Name, metav1.GetOptions{}) - return apierrors.IsNotFound(err) - }).WithTimeout(30 * time.Second).Should(gomega.BeTrueBecause("pod evicted")) + // We can wait for the controller to be idle. + tCtx.Wait() + _, err := fakeClientset.CoreV1().Pods(pod.Namespace).Get(tCtx, pod.Name, metav1.GetOptions{}) + switch { + case err == nil: + tCtx.Fatalf("Pod should have been deleted, it still exists") + case apierrors.IsNotFound(err): + // Okay. + default: + tCtx.Fatalf("Retrieving pod failed: %v", err) + } pod := pod.DeepCopy() pod.Status.Conditions = []v1.PodCondition{{ @@ -1446,24 +1476,17 @@ func TestEviction(t *testing.T) { // Shortly after deletion we should also see updated metrics. // This is the last thing the controller does for a pod. - // However, actually creating the event on the server is asynchronous, - // so we also have to wait for that. - ktesting.Eventually(tCtx, func(tCtx ktesting.TContext) error { - gomega.NewWithT(tCtx).Expect(listEvents(tCtx)).Should(matchDeletionEvent()) - return testPodDeletionsMetrics(controller, 1) - }).WithTimeout(30*time.Second).Should(gomega.Succeed(), "pod eviction done") + // Because of Wait we know that all goroutines are durably blocked and won't + // wake up again to change the metrics => no need for a "Consistently"! + gomega.NewWithT(tCtx).Expect(listEvents(tCtx)).Should(matchDeletionEvent()) + tCtx.ExpectNoError(testPodDeletionsMetrics(controller, 1), "pod eviction done") - // We also don't want any other events, in particular not a cancellation event - // because the pod deletion was observed or another occurrence of the same event. - ktesting.Consistently(tCtx, func(tCtx ktesting.TContext) error { - mutex.Lock() - defer mutex.Unlock() - assert.Equal(tCtx, 1, podUpdates, "number of pod update calls") - assert.Equal(tCtx, 1, podDeletions, "number of pod delete calls") - gomega.NewWithT(tCtx).Expect(listEvents(tCtx)).Should(matchDeletionEvent()) - tCtx.ExpectNoError(testPodDeletionsMetrics(controller, 1)) - return nil - }).WithTimeout(5 * time.Second).Should(gomega.Succeed()) + // Depending on timing, some of the "wait for cache synced" polling sleep a bit more or less, + // so there is a certain delta of uncertainty about the overall duration. Without that polling + // we probably could assert zero runtime here. + tCtx.Logf("eviction duration: %s", time.Since(start)) + delta := time.Second + require.WithinRange(tCtx, time.Now(), start, start.Add(delta), "time to evict pod") }) } } @@ -1472,10 +1495,8 @@ func TestEviction(t *testing.T) { // or removes the slice. Either way, eviction gets cancelled. func TestCancelEviction(t *testing.T) { tCtx := ktesting.Init(t) - tCtx.Parallel() - - tCtx.Run("pod-deleted", func(tCtx ktesting.TContext) { testCancelEviction(tCtx, true) }) - tCtx.Run("slice-deleted", func(tCtx ktesting.TContext) { testCancelEviction(tCtx, false) }) + tCtx.SyncTest("pod-deleted", func(tCtx ktesting.TContext) { testCancelEviction(tCtx, true) }) + tCtx.SyncTest("slice-deleted", func(tCtx ktesting.TContext) { testCancelEviction(tCtx, false) }) } func testCancelEviction(tCtx ktesting.TContext, deletePod bool) { @@ -1557,15 +1578,13 @@ func testCancelEviction(tCtx ktesting.TContext, deletePod bool) { if !deletePod { ktesting.Eventually(tCtx, listEvents).WithTimeout(30 * time.Second).Should(matchCancellationEvent()) } - ktesting.Consistently(tCtx, func(tCtx ktesting.TContext) error { - matchEvents := matchCancellationEvent() - if deletePod { - matchEvents = gomega.BeEmpty() - } - gomega.NewWithT(tCtx).Expect(listEvents(tCtx)).Should(matchEvents) - tCtx.ExpectNoError(testPodDeletionsMetrics(controller, 0)) - return nil - }).WithTimeout(5 * time.Second).Should(gomega.Succeed()) + tCtx.Wait() + matchEvents := matchCancellationEvent() + if deletePod { + matchEvents = gomega.BeEmpty() + } + gomega.NewWithT(tCtx).Expect(listEvents(tCtx)).Should(matchEvents) + tCtx.ExpectNoError(testPodDeletionsMetrics(controller, 0)) } // TestParallelPodDeletion covers the scenario that a pod gets deleted right before @@ -1574,230 +1593,226 @@ func TestParallelPodDeletion(t *testing.T) { tCtx := ktesting.Init(t) tCtx.Parallel() - // This scenario is the same as "evict-pod-resourceclaim" above. - pod := podWithClaimName.DeepCopy() - fakeClientset := fake.NewSimpleClientset( - sliceTainted, - slice2, - inUseClaim, - pod, - ) - tCtx = ktesting.WithClients(tCtx, nil, nil, fakeClientset, nil, nil) + tCtx.SyncTest("", func(tCtx ktesting.TContext) { + // This scenario is the same as "evict-pod-resourceclaim" above. + pod := podWithClaimName.DeepCopy() + fakeClientset := fake.NewSimpleClientset( + sliceTainted, + slice2, + inUseClaim, + pod, + ) + tCtx = ktesting.WithClients(tCtx, nil, nil, fakeClientset, nil, nil) - pod, err := fakeClientset.CoreV1().Pods(pod.Namespace).Get(tCtx, pod.Name, metav1.GetOptions{}) - require.NoError(tCtx, err, "get pod before eviction") - assert.Equal(tCtx, podWithClaimName, pod, "test pod") + pod, err := fakeClientset.CoreV1().Pods(pod.Namespace).Get(tCtx, pod.Name, metav1.GetOptions{}) + require.NoError(tCtx, err, "get pod before eviction") + assert.Equal(tCtx, podWithClaimName, pod, "test pod") - var mutex sync.Mutex - var podGets int - var podDeletions int + var mutex sync.Mutex + var podGets int + var podDeletions int - fakeClientset.PrependReactor("get", "pods", func(action core.Action) (handled bool, ret runtime.Object, err error) { - mutex.Lock() - defer mutex.Unlock() - podGets++ - podName := action.(core.GetAction).GetName() - assert.Equal(t, podWithClaimName.Name, podName, "name of patched pod") + fakeClientset.PrependReactor("get", "pods", func(action core.Action) (handled bool, ret runtime.Object, err error) { + mutex.Lock() + defer mutex.Unlock() + podGets++ + podName := action.(core.GetAction).GetName() + assert.Equal(t, podWithClaimName.Name, podName, "name of patched pod") - // This gets called directly before eviction. Pretend that it is deleted. - err = fakeClientset.Tracker().Delete(v1.SchemeGroupVersion.WithResource("pods"), pod.Namespace, pod.Name) - assert.NoError(tCtx, err, "delete pod") //nolint:testifylint // Here recording an unknown error and continuing is okay. - return true, nil, apierrors.NewNotFound(v1.SchemeGroupVersion.WithResource("pods").GroupResource(), pod.Name) - }) - fakeClientset.PrependReactor("delete", "pods", func(action core.Action) (handled bool, ret runtime.Object, err error) { - mutex.Lock() - defer mutex.Unlock() - podDeletions++ - podName := action.(core.DeleteAction).GetName() - assert.Equal(t, podWithClaimName.Name, podName, "name of deleted pod") - return false, nil, nil - }) - controller := newTestController(tCtx, fakeClientset) + // This gets called directly before eviction. Pretend that it is deleted. + err = fakeClientset.Tracker().Delete(v1.SchemeGroupVersion.WithResource("pods"), pod.Namespace, pod.Name) + assert.NoError(tCtx, err, "delete pod") //nolint:testifylint // Here recording an unknown error and continuing is okay. + return true, nil, apierrors.NewNotFound(v1.SchemeGroupVersion.WithResource("pods").GroupResource(), pod.Name) + }) + fakeClientset.PrependReactor("delete", "pods", func(action core.Action) (handled bool, ret runtime.Object, err error) { + mutex.Lock() + defer mutex.Unlock() + podDeletions++ + podName := action.(core.DeleteAction).GetName() + assert.Equal(t, podWithClaimName.Name, podName, "name of deleted pod") + return false, nil, nil + }) + controller := newTestController(tCtx, fakeClientset) - var wg sync.WaitGroup - defer func() { - tCtx.Log("Waiting for goroutine termination...") - tCtx.Cancel("time to stop") - wg.Wait() - }() - wg.Add(1) - go func() { - defer wg.Done() - assert.NoError(tCtx, controller.Run(tCtx), "eviction controller failed") - }() + var wg sync.WaitGroup + defer func() { + tCtx.Log("Waiting for goroutine termination...") + tCtx.Cancel("time to stop") + wg.Wait() + }() + wg.Add(1) + go func() { + defer wg.Done() + assert.NoError(tCtx, controller.Run(tCtx), "eviction controller failed") + }() - // Eventually the pod gets deleted, in this test by us. - ktesting.Eventually(tCtx, func(tCtx ktesting.TContext) bool { - mutex.Lock() - defer mutex.Unlock() - return podGets >= 1 - }).WithTimeout(30 * time.Second).Should(gomega.BeTrueBecause("pod eviction started")) + // Eventually the pod gets deleted, in this test by us. + ktesting.Eventually(tCtx, func(tCtx ktesting.TContext) bool { + mutex.Lock() + defer mutex.Unlock() + return podGets >= 1 + }).WithTimeout(30 * time.Second).Should(gomega.BeTrueBecause("pod eviction started")) - // We don't want any events. - ktesting.Consistently(tCtx, func(tCtx ktesting.TContext) error { - mutex.Lock() - defer mutex.Unlock() + // We don't want any events. + tCtx.Wait() assert.Equal(tCtx, 1, podGets, "number of pod get calls") assert.Equal(tCtx, 0, podDeletions, "number of pod delete calls") gomega.NewWithT(tCtx).Expect(listEvents(tCtx)).Should(gomega.BeEmpty()) tCtx.ExpectNoError(testPodDeletionsMetrics(controller, 0)) - return nil - }).WithTimeout(5 * time.Second).Should(gomega.Succeed()) + }) } // TestRetry covers the scenario that an eviction attempt must be retried. func TestRetry(t *testing.T) { tCtx := ktesting.Init(t) - tCtx.Parallel() - // This scenario is the same as "evict-pod-resourceclaim" above. - pod := podWithClaimName.DeepCopy() - fakeClientset := fake.NewSimpleClientset( - sliceTainted, - slice2, - inUseClaim, - pod, - ) - tCtx = ktesting.WithClients(tCtx, nil, nil, fakeClientset, nil, nil) + tCtx.SyncTest("", func(tCtx ktesting.TContext) { + // This scenario is the same as "evict-pod-resourceclaim" above. + pod := podWithClaimName.DeepCopy() + fakeClientset := fake.NewSimpleClientset( + sliceTainted, + slice2, + inUseClaim, + pod, + ) + tCtx = ktesting.WithClients(tCtx, nil, nil, fakeClientset, nil, nil) - pod, err := fakeClientset.CoreV1().Pods(pod.Namespace).Get(tCtx, pod.Name, metav1.GetOptions{}) - require.NoError(tCtx, err, "get pod before eviction") - assert.Equal(tCtx, podWithClaimName, pod, "test pod") + pod, err := fakeClientset.CoreV1().Pods(pod.Namespace).Get(tCtx, pod.Name, metav1.GetOptions{}) + require.NoError(tCtx, err, "get pod before eviction") + assert.Equal(tCtx, podWithClaimName, pod, "test pod") - var mutex sync.Mutex - var podGets int - var podDeletions int + var mutex sync.Mutex + var podGets int + var podDeletions int - fakeClientset.PrependReactor("get", "pods", func(action core.Action) (handled bool, ret runtime.Object, err error) { - mutex.Lock() - defer mutex.Unlock() - podGets++ - podName := action.(core.GetAction).GetName() - assert.Equal(t, podWithClaimName.Name, podName, "name of patched pod") + fakeClientset.PrependReactor("get", "pods", func(action core.Action) (handled bool, ret runtime.Object, err error) { + mutex.Lock() + defer mutex.Unlock() + podGets++ + podName := action.(core.GetAction).GetName() + assert.Equal(t, podWithClaimName.Name, podName, "name of patched pod") - // This gets called directly before eviction. Pretend that there is an intermittent error. - if podGets == 1 { - return true, nil, apierrors.NewInternalError(errors.New("fake error")) - } - return false, nil, nil - }) - fakeClientset.PrependReactor("delete", "pods", func(action core.Action) (handled bool, ret runtime.Object, err error) { - mutex.Lock() - defer mutex.Unlock() - podDeletions++ - podName := action.(core.DeleteAction).GetName() - assert.Equal(t, podWithClaimName.Name, podName, "name of deleted pod") - return false, nil, nil - }) - controller := newTestController(tCtx, fakeClientset) + // This gets called directly before eviction. Pretend that there is an intermittent error. + if podGets == 1 { + return true, nil, apierrors.NewInternalError(errors.New("fake error")) + } + return false, nil, nil + }) + fakeClientset.PrependReactor("delete", "pods", func(action core.Action) (handled bool, ret runtime.Object, err error) { + mutex.Lock() + defer mutex.Unlock() + podDeletions++ + podName := action.(core.DeleteAction).GetName() + assert.Equal(t, podWithClaimName.Name, podName, "name of deleted pod") + return false, nil, nil + }) + controller := newTestController(tCtx, fakeClientset) - var wg sync.WaitGroup - defer func() { - t.Log("Waiting for goroutine termination...") - tCtx.Cancel("time to stop") - wg.Wait() - }() - wg.Add(1) - go func() { - defer wg.Done() - assert.NoError(tCtx, controller.Run(tCtx), "eviction controller failed") - }() + var wg sync.WaitGroup + defer func() { + t.Log("Waiting for goroutine termination...") + tCtx.Cancel("time to stop") + wg.Wait() + }() + wg.Add(1) + go func() { + defer wg.Done() + assert.NoError(tCtx, controller.Run(tCtx), "eviction controller failed") + }() - // Eventually the pod gets deleted and the event is recorded. - ktesting.Eventually(tCtx, func(tCtx ktesting.TContext) error { - gomega.NewWithT(tCtx).Expect(listEvents(tCtx)).Should(matchDeletionEvent()) - return testPodDeletionsMetrics(controller, 1) - }).WithTimeout(30*time.Second).Should(gomega.Succeed(), "pod eviction done") + // Eventually the pod gets deleted and the event is recorded. + ktesting.Eventually(tCtx, func(tCtx ktesting.TContext) error { + gomega.NewWithT(tCtx).Expect(listEvents(tCtx)).Should(matchDeletionEvent()) + return testPodDeletionsMetrics(controller, 1) + }).WithTimeout(30*time.Second).Should(gomega.Succeed(), "pod eviction done") - // Now we can check the API calls. - ktesting.Consistently(tCtx, func(tCtx ktesting.TContext) error { - mutex.Lock() - defer mutex.Unlock() + // Now we can check the API calls. + tCtx.Wait() assert.Equal(tCtx, 2, podGets, "number of pod get calls") assert.Equal(tCtx, 1, podDeletions, "number of pod delete calls") gomega.NewWithT(tCtx).Expect(listEvents(tCtx)).Should(matchDeletionEvent()) tCtx.ExpectNoError(testPodDeletionsMetrics(controller, 1)) - return nil - }).WithTimeout(5 * time.Second).Should(gomega.Succeed()) + }) } // TestRetry covers the scenario that an eviction attempt fails. func TestEvictionFailure(t *testing.T) { tCtx := ktesting.Init(t) - tCtx.Parallel() - // This scenario is the same as "evict-pod-resourceclaim" above. - pod := podWithClaimName.DeepCopy() - fakeClientset := fake.NewSimpleClientset( - sliceTainted, - slice2, - inUseClaim, - pod, - ) - tCtx = ktesting.WithClients(tCtx, nil, nil, fakeClientset, nil, nil) + tCtx.SyncTest("", func(tCtx ktesting.TContext) { + // This scenario is the same as "evict-pod-resourceclaim" above. + pod := podWithClaimName.DeepCopy() + fakeClientset := fake.NewSimpleClientset( + sliceTainted, + slice2, + inUseClaim, + pod, + ) + tCtx = ktesting.WithClients(tCtx, nil, nil, fakeClientset, nil, nil) - pod, err := fakeClientset.CoreV1().Pods(pod.Namespace).Get(tCtx, pod.Name, metav1.GetOptions{}) - require.NoError(tCtx, err, "get pod before eviction") - assert.Equal(tCtx, podWithClaimName, pod, "test pod") + pod, err := fakeClientset.CoreV1().Pods(pod.Namespace).Get(tCtx, pod.Name, metav1.GetOptions{}) + require.NoError(tCtx, err, "get pod before eviction") + assert.Equal(tCtx, podWithClaimName, pod, "test pod") - var mutex sync.Mutex - var podGets int - var podDeletions int + var mutex sync.Mutex + var podGets int + var podDeletions int - fakeClientset.PrependReactor("get", "pods", func(action core.Action) (handled bool, ret runtime.Object, err error) { - mutex.Lock() - defer mutex.Unlock() - podGets++ - podName := action.(core.GetAction).GetName() - assert.Equal(t, podWithClaimName.Name, podName, "name of patched pod") - return false, nil, nil - }) - fakeClientset.PrependReactor("delete", "pods", func(action core.Action) (handled bool, ret runtime.Object, err error) { - mutex.Lock() - defer mutex.Unlock() - podDeletions++ - podName := action.(core.DeleteAction).GetName() - assert.Equal(t, podWithClaimName.Name, podName, "name of deleted pod") - return true, nil, apierrors.NewInternalError(errors.New("fake error")) - }) - controller := newTestController(tCtx, fakeClientset) + fakeClientset.PrependReactor("get", "pods", func(action core.Action) (handled bool, ret runtime.Object, err error) { + mutex.Lock() + defer mutex.Unlock() + podGets++ + podName := action.(core.GetAction).GetName() + assert.Equal(t, podWithClaimName.Name, podName, "name of patched pod") + return false, nil, nil + }) + fakeClientset.PrependReactor("delete", "pods", func(action core.Action) (handled bool, ret runtime.Object, err error) { + mutex.Lock() + defer mutex.Unlock() + podDeletions++ + podName := action.(core.DeleteAction).GetName() + assert.Equal(t, podWithClaimName.Name, podName, "name of deleted pod") + return true, nil, apierrors.NewInternalError(errors.New("fake error")) + }) + controller := newTestController(tCtx, fakeClientset) - var wg sync.WaitGroup - defer func() { - t.Log("Waiting for goroutine termination...") - tCtx.Cancel("time to stop") - wg.Wait() - }() - wg.Add(1) - go func() { - defer wg.Done() - assert.NoError(tCtx, controller.Run(tCtx), "eviction controller failed") - }() + var wg sync.WaitGroup + defer func() { + t.Log("Waiting for goroutine termination...") + tCtx.Cancel("time to stop") + wg.Wait() + }() + wg.Add(1) + go func() { + defer wg.Done() + assert.NoError(tCtx, controller.Run(tCtx), "eviction controller failed") + }() - // Eventually deletion is attempted a few times. - ktesting.Eventually(tCtx, func(tCtx ktesting.TContext) int { - mutex.Lock() - defer mutex.Unlock() - return podDeletions - }).WithTimeout(30*time.Second).Should(gomega.BeNumerically(">=", retries), "pod eviction failed") + // Block until eviction has started. + // Eventually deletion is attempted a few times. + ktesting.Eventually(tCtx, func(tCtx ktesting.TContext) int { + mutex.Lock() + defer mutex.Unlock() + return podDeletions + }).WithTimeout(30*time.Second).Should(gomega.BeNumerically(">=", retries), "pod eviction failed") - // Now we can check the API calls. - ktesting.Consistently(tCtx, func(tCtx ktesting.TContext) error { - mutex.Lock() - defer mutex.Unlock() + // Now we can check the API calls. + // The background goroutined must be done when Wait returns, + // otherwise Wait wouldn't return. + tCtx.Wait() assert.Equal(tCtx, retries, podGets, "number of pod get calls") assert.Equal(tCtx, retries, podDeletions, "number of pod delete calls") gomega.NewWithT(tCtx).Expect(listEvents(tCtx)).Should(matchDeletionEvent()) tCtx.ExpectNoError(testPodDeletionsMetrics(controller, 0)) - return nil - }).WithTimeout(5 * time.Second).Should(gomega.Succeed()) + }) } // BenchTaintUntaint checks the full flow of detecting a claim as // tainted because of a new DeviceTaintRule, starting to evict its // consumer, and then undoing that when the DeviceTaintRule is removed. func BenchmarkTaintUntaint(b *testing.B) { - tContext := setup(b) + tCtx := ktesting.Init(b) + tContext := setup(tCtx) podStore := tContext.informerFactory.Core().V1().Pods().Informer().GetStore() // No output, comment out if output is desired. tContext.Controller.eventLogger = nil From fee14ffca2099b2d7a0bfc5761691b363eac4de1 Mon Sep 17 00:00:00 2001 From: Patrick Ohly Date: Thu, 18 Sep 2025 19:44:06 +0200 Subject: [PATCH 02/11] DRA API: device taints 1.35 This raises the number of allowed taints per device to 16 by lowering the number of allowed devices to 64 per ResourceSlice if (and only if!) taints are used. "effect: None" and DeviceTaintRule status with conditions get added to support giving feedback to admins. Instead of merely adding the new effect value, this also changes validation of the enum so that unknown values are valid if they were already stored. This will simplify adding new effects in the future because validation won't fail for them after a downgrade. Consumers must treat them like this new None effect, i.e. ignore them. --- pkg/apis/resource/types.go | 75 +++++- pkg/apis/resource/validation/validation.go | 83 +++++- .../validation_devicetaintrule_test.go | 35 ++- .../validation_resourceclaim_test.go | 2 +- .../validation_resourceslice_test.go | 59 ++++- .../devicetaintrule/storage/storage.go | 48 +++- .../devicetaintrule/storage/storage_test.go | 59 ++++- .../resource/devicetaintrule/strategy.go | 92 +++++-- .../resource/devicetaintrule/strategy_test.go | 250 +++++++++++++++--- .../resource/resourceclaim/strategy.go | 4 + .../resource/rest/storage_resource.go | 3 +- staging/src/k8s.io/api/resource/v1/types.go | 28 +- .../src/k8s.io/api/resource/v1alpha3/types.go | 66 ++++- .../src/k8s.io/api/resource/v1beta1/types.go | 28 +- .../src/k8s.io/api/resource/v1beta2/types.go | 28 +- .../client-go/applyconfigurations/utils.go | 2 + .../apiserver/apply/status_test.go | 1 + test/integration/dra/dra_test.go | 37 +-- test/integration/dra/objects.go | 43 +-- 19 files changed, 793 insertions(+), 150 deletions(-) diff --git a/pkg/apis/resource/types.go b/pkg/apis/resource/types.go index dd9e76d8bdc..016e7c236de 100644 --- a/pkg/apis/resource/types.go +++ b/pkg/apis/resource/types.go @@ -145,7 +145,7 @@ type ResourceSliceSpec struct { // Devices lists some or all of the devices in this pool. // - // Must not have more than 128 entries. + // Must not have more than 128 entries. If any device uses taints the limit is 64. // // +optional // +listType=atomic @@ -243,6 +243,7 @@ type ResourcePool struct { const ResourceSliceMaxSharedCapacity = 128 const ResourceSliceMaxDevices = 128 +const ResourceSliceMaxDevicesWithTaints = 64 const PoolNameMaxLength = validation.DNS1123SubdomainMaxLength // Same as for a single node name. const BindingConditionsMaxSize = 4 const BindingFailureConditionsMaxSize = 4 @@ -326,7 +327,9 @@ type Device struct { // If specified, these are the driver-defined taints. // - // The maximum number of taints is 4. + // The maximum number of taints is 16. If taints are set for + // any device in a ResourceSlice, then the maximum number of + // allowed devices per ResourceSlice is 64 instead of 128. // // This is an alpha field and requires enabling the DRADeviceTaints // feature gate. @@ -601,8 +604,8 @@ type DeviceAttribute struct { // DeviceAttributeMaxValueLength is the maximum length of a string or version attribute value. const DeviceAttributeMaxValueLength = 64 -// DeviceTaintsMaxLength is the maximum number of taints per device. -const DeviceTaintsMaxLength = 4 +// DeviceTaintsMaxLength is the maximum number of taints per Device. +const DeviceTaintsMaxLength = 16 // The device this taint is attached to has the "effect" on // any claim which does not tolerate the taint and, through the claim, @@ -622,8 +625,10 @@ type DeviceTaint struct { // The effect of the taint on claims that do not tolerate the taint // and through such claims on the pods using them. - // Valid effects are NoSchedule and NoExecute. PreferNoSchedule as used for - // nodes is not valid here. + // + // Valid effects are None, NoSchedule and NoExecute. PreferNoSchedule as used for + // nodes is not valid here. More effects may get added in the future. + // Consumers must treat unknown effects like None. // // +required Effect DeviceTaintEffect @@ -632,6 +637,14 @@ type DeviceTaint struct { // // Implementing PreferNoSchedule would depend on a scoring solution for DRA. // It might get added as part of that. + // + // A possible future new effect is NoExecuteWithPodDisruptionBudget: + // honor the pod disruption budget instead of simply deleting pods. + // This is currently undecided, it could also be a separate field. + // + // Validation must be prepared to allow unknown enums in stored objects, + // which will enable adding new enums within a single release without + // ratcheting. // TimeAdded represents the time at which the taint was added. // Added automatically during create or update if not set. @@ -650,6 +663,9 @@ type DeviceTaint struct { type DeviceTaintEffect string const ( + // No effect, the taint is purely informational. + DeviceTaintEffectNone DeviceTaintEffect = "None" + // Do not allow new pods to schedule which use a tainted device unless they tolerate the taint, // but allow all pods submitted to Kubelet without going through the scheduler // to start, and allow all already-running pods to continue running. @@ -1876,18 +1892,16 @@ type DeviceTaintRule struct { // Changing the spec automatically increments the metadata.generation number. Spec DeviceTaintRuleSpec - // ^^^ - // A spec gets added because adding a status seems likely. - // Such a status could provide feedback on applying the - // eviction and/or statistics (number of matching devices, - // affected allocated claims, pods remaining to be evicted, - // etc.). + // Status provides information about what was requested in the spec. + // + // +optional + Status DeviceTaintRuleStatus } // DeviceTaintRuleSpec specifies the selector and one taint. type DeviceTaintRuleSpec struct { // DeviceSelector defines which device(s) the taint is applied to. - // All selector criteria must be satified for a device to + // All selector criteria must be satisfied for a device to // match. The empty selector matches all devices. Without // a selector, no devices are matches. // @@ -1947,6 +1961,41 @@ type DeviceTaintSelector struct { Selectors []DeviceSelector } +// DeviceTaintRuleStatus provides information about an on-going pod eviction. +type DeviceTaintRuleStatus struct { + // Conditions provide information about the state of the DeviceTaintRule + // and the cluster at some point in time, + // in a machine-readable and human-readable format. + // + // The following condition is currently defined as part of this API, more may + // get added: + // - Type: EvictionInProgress + // - Status: True if there are currently pods which need to be evicted, False otherwise + // (includes the effects which don't cause eviction). + // - Reason: not specified, may change + // - Message: includes information about number of pending pods and already evicted pods + // in a human-readable format, updated periodically, may change + // + // For `effect: None`, the condition above gets set once for each change to + // the spec, with the message containing information about what would happen + // if the effect was `NoExecute`. This feedback can be used to decide whether + // changing the effect to `NoExecute` will work as intended. It only gets + // set once to avoid having to constantly update the status. + // + // Must have 8 or less entries. + // + // +optional + // +listType=map + // +listMapKey=type + Conditions []metav1.Condition +} + +// DeviceTaintRuleStatusMaxConditions is the maximum number of conditions in DeviceTaintRuleStatus. +const DeviceTaintRuleStatusMaxConditions = 8 + +// DeviceTaintConditionEvictionInProgress is the publicly documented condition type for the DeviceTaintRuleStatus. +const DeviceTaintConditionEvictionInProgress = "EvictionInProgress" + // +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object // DeviceTaintRuleList is a collection of DeviceTaintRules. diff --git a/pkg/apis/resource/validation/validation.go b/pkg/apis/resource/validation/validation.go index 2d5529c9532..313f829c37e 100644 --- a/pkg/apis/resource/validation/validation.go +++ b/pkg/apis/resource/validation/validation.go @@ -704,9 +704,14 @@ func validateResourceSliceSpec(spec, oldSpec *resource.ResourceSliceSpec, fldPat } sharedCounterToCounterNames := gatherSharedCounterCounterNames(spec.SharedCounters) - allErrs = append(allErrs, validateSet(spec.Devices, resource.ResourceSliceMaxDevices, + maxDevices := resource.ResourceSliceMaxDevices + if haveDeviceTaints(spec) { + maxDevices = resource.ResourceSliceMaxDevicesWithTaints + } + allErrs = append(allErrs, validateSet(spec.Devices, maxDevices, func(device resource.Device, fldPath *field.Path) field.ErrorList { - return validateDevice(device, fldPath, sharedCounterToCounterNames, spec.PerDeviceNodeSelection) + oldDevice := lookupDevice(oldSpec, device.Name) + return validateDevice(device, oldDevice, fldPath, sharedCounterToCounterNames, spec.PerDeviceNodeSelection) }, func(device resource.Device) string { return device.Name @@ -740,6 +745,32 @@ func validateResourceSliceSpec(spec, oldSpec *resource.ResourceSliceSpec, fldPat return allErrs } +func haveDeviceTaints(spec *resource.ResourceSliceSpec) bool { + if spec == nil { + return false + } + + for _, device := range spec.Devices { + if len(device.Taints) > 0 { + return true + } + } + return false +} + +func lookupDevice(spec *resource.ResourceSliceSpec, deviceName string) *resource.Device { + if spec == nil { + return nil + } + for i := range spec.Devices { + device := &spec.Devices[i] + if device.Name == deviceName { + return device + } + } + return nil +} + func validateCounterSet(counterSet resource.CounterSet, fldPath *field.Path) field.ErrorList { var allErrs field.ErrorList if counterSet.Name == "" { @@ -782,7 +813,7 @@ func validateResourcePool(pool resource.ResourcePool, fldPath *field.Path) field return allErrs } -func validateDevice(device resource.Device, fldPath *field.Path, sharedCounterToCounterNames map[string]sets.Set[string], perDeviceNodeSelection *bool) field.ErrorList { +func validateDevice(device resource.Device, oldDevice *resource.Device, fldPath *field.Path, sharedCounterToCounterNames map[string]sets.Set[string], perDeviceNodeSelection *bool) field.ErrorList { var allErrs field.ErrorList allowMultipleAllocations := device.AllowMultipleAllocations != nil && *device.AllowMultipleAllocations allErrs = append(allErrs, validateDeviceName(device.Name, fldPath.Child("name"))...) @@ -799,7 +830,15 @@ func validateDevice(device resource.Device, fldPath *field.Path, sharedCounterTo } else { allErrs = append(allErrs, validateMap(device.Capacity, -1, attributeAndCapacityMaxKeyLength, validateQualifiedName, validateSingleAllocatableDeviceCapacity, fldPath.Child("capacity"))...) } - allErrs = append(allErrs, validateSlice(device.Taints, resource.DeviceTaintsMaxLength, validateDeviceTaint, fldPath.Child("taints"))...) + // If the entire set is the same as before then validation can be skipped. + // We could also do the DeepEqual on the entire spec, but here it is a bit cheaper. + if oldDevice == nil || !apiequality.Semantic.DeepEqual(oldDevice.Taints, device.Taints) { + allErrs = append(allErrs, validateSlice(device.Taints, resource.DeviceTaintsMaxLength, + func(taint resource.DeviceTaint, fldPath *field.Path) field.ErrorList { + return validateDeviceTaint(taint, nil, fldPath) + }, + fldPath.Child("taints"))...) + } allErrs = append(allErrs, validateSet(device.ConsumesCounters, -1, validateDeviceCounterConsumption, @@ -1342,7 +1381,11 @@ func validateDeviceTaintRuleSpec(spec, oldSpec *resource.DeviceTaintRuleSpec, fl oldFilter = oldSpec.DeviceSelector // +k8s:verify-mutation:reason=clone } allErrs = append(allErrs, validateDeviceTaintSelector(spec.DeviceSelector, oldFilter, fldPath.Child("deviceSelector"))...) - allErrs = append(allErrs, validateDeviceTaint(spec.Taint, fldPath.Child("taint"))...) + var oldTaint *resource.DeviceTaint + if oldSpec != nil { + oldTaint = &oldSpec.Taint // +k8s:verify-mutation:reason=clone + } + allErrs = append(allErrs, validateDeviceTaint(spec.Taint, oldTaint, fldPath.Child("taint"))...) return allErrs } @@ -1382,20 +1425,22 @@ func validateDeviceTaintSelector(filter, oldFilter *resource.DeviceTaintSelector } var validDeviceTolerationOperators = []resource.DeviceTolerationOperator{resource.DeviceTolerationOpEqual, resource.DeviceTolerationOpExists} -var validDeviceTaintEffects = sets.New(resource.DeviceTaintEffectNoSchedule, resource.DeviceTaintEffectNoExecute) +var validDeviceTaintEffects = sets.New(resource.DeviceTaintEffectNoSchedule, resource.DeviceTaintEffectNoExecute, resource.DeviceTaintEffectNone) -func validateDeviceTaint(taint resource.DeviceTaint, fldPath *field.Path) field.ErrorList { +func validateDeviceTaint(taint resource.DeviceTaint, oldTaint *resource.DeviceTaint, fldPath *field.Path) field.ErrorList { var allErrs field.ErrorList allErrs = append(allErrs, metav1validation.ValidateLabelName(taint.Key, fldPath.Child("key"))...) // Includes checking for non-empty. if taint.Value != "" { allErrs = append(allErrs, validateLabelValue(taint.Value, fldPath.Child("value"))...) } - switch { - case taint.Effect == "": - allErrs = append(allErrs, field.Required(fldPath.Child("effect"), "").MarkCoveredByDeclarative()) // Required in a taint. - case !validDeviceTaintEffects.Has(taint.Effect): - allErrs = append(allErrs, field.NotSupported(fldPath.Child("effect"), taint.Effect, sets.List(validDeviceTaintEffects)).MarkCoveredByDeclarative()) + if oldTaint == nil || oldTaint.Effect != taint.Effect { + switch { + case taint.Effect == "": + allErrs = append(allErrs, field.Required(fldPath.Child("effect"), "").MarkCoveredByDeclarative()) // Required in a taint. + case !validDeviceTaintEffects.Has(taint.Effect): + allErrs = append(allErrs, field.NotSupported(fldPath.Child("effect"), taint.Effect, sets.List(validDeviceTaintEffects)).MarkCoveredByDeclarative()) + } } return allErrs @@ -1478,3 +1523,17 @@ func validateDeviceBindingParameters(bindingConditions, bindingFailureConditions return allErrs } + +// ValidateDeviceTaintRuleStatusUpdate tests if a DeviceTaintRule status update is valid. +func ValidateDeviceTaintRuleStatusUpdate(rule, oldRule *resource.DeviceTaintRule) field.ErrorList { + var allErrs field.ErrorList + + fldPath := field.NewPath("status") + allErrs = corevalidation.ValidateObjectMetaUpdate(&rule.ObjectMeta, &oldRule.ObjectMeta, field.NewPath("metadata")) // Covers invalid name changes. + allErrs = append(allErrs, metav1validation.ValidateConditions(rule.Status.Conditions, fldPath.Child("conditions"))...) + if len(rule.Status.Conditions) > resource.DeviceTaintRuleStatusMaxConditions { + allErrs = append(allErrs, field.TooMany(fldPath.Child("conditions"), len(rule.Status.Conditions), resource.DeviceTaintRuleStatusMaxConditions)) + } + + return allErrs +} diff --git a/pkg/apis/resource/validation/validation_devicetaintrule_test.go b/pkg/apis/resource/validation/validation_devicetaintrule_test.go index 089f738cfc5..48c606c388b 100644 --- a/pkg/apis/resource/validation/validation_devicetaintrule_test.go +++ b/pkg/apis/resource/validation/validation_devicetaintrule_test.go @@ -283,6 +283,7 @@ func TestValidateDeviceTaint(t *testing.T) { return claim }(), }, + // Minimal tests for DeviceTaint. Full coverage of validateDeviceTaint is in ResourceSlice test. "valid-taint": { taintRule: func() *resourceapi.DeviceTaintRule { claim := testDeviceTaintRule(goodName, validDeviceTaintRuleSpec) @@ -294,20 +295,33 @@ func TestValidateDeviceTaint(t *testing.T) { return claim }(), }, - "invalid-taint": { + "required-taint": { wantFailures: field.ErrorList{ field.Required(field.NewPath("spec", "taint", "effect"), "").MarkCoveredByDeclarative(), }, taintRule: func() *resourceapi.DeviceTaintRule { claim := testDeviceTaintRule(goodName, validDeviceTaintRuleSpec) claim.Spec.Taint = resourceapi.DeviceTaint{ - // Minimal test. Full coverage of validateDeviceTaint is in ResourceSlice test. Key: goodName, Value: goodName, } return claim }(), }, + "invalid-taint": { + wantFailures: field.ErrorList{ + field.NotSupported(field.NewPath("spec", "taint", "effect"), resourceapi.DeviceTaintEffect("some-other-effect"), []resourceapi.DeviceTaintEffect{resourceapi.DeviceTaintEffectNoExecute, resourceapi.DeviceTaintEffectNoSchedule, resourceapi.DeviceTaintEffectNone}).MarkCoveredByDeclarative(), + }, + taintRule: func() *resourceapi.DeviceTaintRule { + claim := testDeviceTaintRule(goodName, validDeviceTaintRuleSpec) + claim.Spec.Taint = resourceapi.DeviceTaint{ + Effect: "some-other-effect", + Key: goodName, + Value: goodName, + } + return claim + }(), + }, } for name, scenario := range scenarios { @@ -321,6 +335,8 @@ func TestValidateDeviceTaint(t *testing.T) { func TestValidateDeviceTaintUpdate(t *testing.T) { name := "valid" validTaintRule := testDeviceTaintRule(name, validDeviceTaintRuleSpec) + invalidTaintEffectRule := validTaintRule.DeepCopy() + invalidTaintEffectRule.Spec.Taint.Effect = "some-other-effect" scenarios := map[string]struct { old *resourceapi.DeviceTaintRule @@ -339,6 +355,21 @@ func TestValidateDeviceTaintUpdate(t *testing.T) { return taintRule }, }, + "valid-existing-unknown-effect": { + old: invalidTaintEffectRule, + update: func(taintRule *resourceapi.DeviceTaintRule) *resourceapi.DeviceTaintRule { + taintRule.Labels = map[string]string{"a": "b"} + return taintRule + }, + }, + "invalid-new-unknown-effect": { + wantFailures: field.ErrorList{field.NotSupported(field.NewPath("spec", "taint", "effect"), resourceapi.DeviceTaintEffect("some-other-effect"), []resourceapi.DeviceTaintEffect{resourceapi.DeviceTaintEffectNoExecute, resourceapi.DeviceTaintEffectNoSchedule, resourceapi.DeviceTaintEffectNone})}.MarkCoveredByDeclarative(), + old: validTaintRule, + update: func(taintRule *resourceapi.DeviceTaintRule) *resourceapi.DeviceTaintRule { + taintRule.Spec.Taint.Effect = "some-other-effect" + return taintRule + }, + }, } for name, scenario := range scenarios { diff --git a/pkg/apis/resource/validation/validation_resourceclaim_test.go b/pkg/apis/resource/validation/validation_resourceclaim_test.go index 598cfd6f2b0..8f90f8b381a 100644 --- a/pkg/apis/resource/validation/validation_resourceclaim_test.go +++ b/pkg/apis/resource/validation/validation_resourceclaim_test.go @@ -793,7 +793,7 @@ func TestValidateClaim(t *testing.T) { field.Invalid(fldPath.Index(5).Child("key"), badName, "name part must consist of alphanumeric characters, '-', '_' or '.', and must start and end with an alphanumeric character (e.g. 'MyName', or 'my.name', or '123-abc', regex used for validation is '([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9]')").MarkCoveredByDeclarative(), field.Invalid(fldPath.Index(5).Child("value"), badName, "a valid label must be an empty string or consist of alphanumeric characters, '-', '_' or '.', and must start and end with an alphanumeric character (e.g. 'MyValue', or 'my_value', or '12345', regex used for validation is '(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])?')"), - field.NotSupported(fldPath.Index(5).Child("effect"), resource.DeviceTaintEffect("some-other-effect"), []resource.DeviceTaintEffect{resource.DeviceTaintEffectNoExecute, resource.DeviceTaintEffectNoSchedule}).MarkCoveredByDeclarative(), + field.NotSupported(fldPath.Index(5).Child("effect"), resource.DeviceTaintEffect("some-other-effect"), []resource.DeviceTaintEffect{resource.DeviceTaintEffectNoExecute, resource.DeviceTaintEffectNoSchedule, resource.DeviceTaintEffectNone}).MarkCoveredByDeclarative(), ) return allErrs }(), diff --git a/pkg/apis/resource/validation/validation_resourceslice_test.go b/pkg/apis/resource/validation/validation_resourceslice_test.go index 5bb9a316163..b14dc7f3298 100644 --- a/pkg/apis/resource/validation/validation_resourceslice_test.go +++ b/pkg/apis/resource/validation/validation_resourceslice_test.go @@ -108,6 +108,25 @@ func TestValidateResourceSlice(t *testing.T) { wantFailures: field.ErrorList{field.TooMany(field.NewPath("spec", "devices"), resourceapi.ResourceSliceMaxDevices+1, resourceapi.ResourceSliceMaxDevices)}, slice: testResourceSlice(goodName, goodName, goodName, resourceapi.ResourceSliceMaxDevices+1), }, + "good-taints": { + slice: func() *resourceapi.ResourceSlice { + slice := testResourceSlice(goodName, goodName, goodName, resourceapi.ResourceSliceMaxDevicesWithTaints) + for i := range slice.Spec.Devices { + slice.Spec.Devices[i].Taints = []resourceapi.DeviceTaint{{Key: "example.com/taint", Effect: resourceapi.DeviceTaintEffectNoExecute}} + } + return slice + }(), + }, + "too-large-taints": { + wantFailures: field.ErrorList{field.TooMany(field.NewPath("spec", "devices"), resourceapi.ResourceSliceMaxDevicesWithTaints+1, resourceapi.ResourceSliceMaxDevicesWithTaints)}, + slice: func() *resourceapi.ResourceSlice { + slice := testResourceSlice(goodName, goodName, goodName, resourceapi.ResourceSliceMaxDevicesWithTaints+1) + for i := range slice.Spec.Devices { + slice.Spec.Devices[i].Taints = []resourceapi.DeviceTaint{{Key: "example.com/taint", Effect: resourceapi.DeviceTaintEffectNoExecute}} + } + return slice + }(), + }, "missing-name": { wantFailures: field.ErrorList{field.Required(field.NewPath("metadata", "name"), "name or generateName is required")}, slice: testResourceSlice("", goodName, driverName, 1), @@ -497,7 +516,7 @@ func TestValidateResourceSlice(t *testing.T) { field.Invalid(fldPath.Index(3).Child("key"), badName, "name part must consist of alphanumeric characters, '-', '_' or '.', and must start and end with an alphanumeric character (e.g. 'MyName', or 'my.name', or '123-abc', regex used for validation is '([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9]')"), field.Invalid(fldPath.Index(3).Child("value"), badName, "a valid label must be an empty string or consist of alphanumeric characters, '-', '_' or '.', and must start and end with an alphanumeric character (e.g. 'MyValue', or 'my_value', or '12345', regex used for validation is '(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])?')"), - field.NotSupported(fldPath.Index(3).Child("effect"), resourceapi.DeviceTaintEffect("some-other-op"), []resourceapi.DeviceTaintEffect{resourceapi.DeviceTaintEffectNoExecute, resourceapi.DeviceTaintEffectNoSchedule}).MarkCoveredByDeclarative(), + field.NotSupported(fldPath.Index(3).Child("effect"), resourceapi.DeviceTaintEffect("some-other-effect"), []resourceapi.DeviceTaintEffect{resourceapi.DeviceTaintEffectNoExecute, resourceapi.DeviceTaintEffectNoSchedule, resourceapi.DeviceTaintEffectNone}).MarkCoveredByDeclarative(), } }(), slice: func() *resourceapi.ResourceSlice { @@ -522,7 +541,7 @@ func TestValidateResourceSlice(t *testing.T) { // Invalid strings. Key: badName, Value: badName, - Effect: "some-other-op", + Effect: "some-other-effect", }, } return slice @@ -867,6 +886,17 @@ func TestValidateResourceSlice(t *testing.T) { func TestValidateResourceSliceUpdate(t *testing.T) { name := "valid" validResourceSlice := testResourceSlice(name, name, name, 1) + invalidResourceSliceWithTaints := validResourceSlice.DeepCopy() + invalidResourceSliceWithTaints.Spec.Devices[0].Taints = []resourceapi.DeviceTaint{ + { + Key: "unhealthy-power", + Effect: resourceapi.DeviceTaintEffectNoExecute, + }, + { + Key: "unhealthy-mem", + Effect: "some-other-effect", + }, + } scenarios := map[string]struct { oldResourceSlice *resourceapi.ResourceSlice @@ -938,6 +968,31 @@ func TestValidateResourceSliceUpdate(t *testing.T) { return slice }, }, + "invalid-new-effect-in-old-device": { + wantFailures: field.ErrorList{field.NotSupported(field.NewPath("spec", "devices").Index(0).Child("taints").Index(1).Child("effect"), resourceapi.DeviceTaintEffect("some-other-effect"), []resourceapi.DeviceTaintEffect{resourceapi.DeviceTaintEffectNoExecute, resourceapi.DeviceTaintEffectNoSchedule, resourceapi.DeviceTaintEffectNone})}.MarkCoveredByDeclarative(), + oldResourceSlice: validResourceSlice, + update: func(slice *resourceapi.ResourceSlice) *resourceapi.ResourceSlice { + slice.Spec.Devices[0].Taints = invalidResourceSliceWithTaints.Spec.Devices[0].Taints + return slice + }, + }, + "valid-old-effect": { + oldResourceSlice: invalidResourceSliceWithTaints, + update: func(slice *resourceapi.ResourceSlice) *resourceapi.ResourceSlice { + slice.Spec.Devices[0].Attributes["foo"] = resourceapi.DeviceAttribute{StringValue: ptr.To("bar")} + return slice + }, + }, + "invalid-new-effect-in-new-device": { + wantFailures: field.ErrorList{field.NotSupported(field.NewPath("spec", "devices").Index(1).Child("taints").Index(1).Child("effect"), resourceapi.DeviceTaintEffect("some-other-effect"), []resourceapi.DeviceTaintEffect{resourceapi.DeviceTaintEffectNoExecute, resourceapi.DeviceTaintEffectNoSchedule, resourceapi.DeviceTaintEffectNone})}.MarkCoveredByDeclarative(), + oldResourceSlice: invalidResourceSliceWithTaints, + update: func(slice *resourceapi.ResourceSlice) *resourceapi.ResourceSlice { + device := slice.Spec.Devices[0].DeepCopy() + device.Name += "-other" + slice.Spec.Devices = append(slice.Spec.Devices, *device) + return slice + }, + }, } for name, scenario := range scenarios { diff --git a/pkg/registry/resource/devicetaintrule/storage/storage.go b/pkg/registry/resource/devicetaintrule/storage/storage.go index 64ff2fb3972..9919426979e 100644 --- a/pkg/registry/resource/devicetaintrule/storage/storage.go +++ b/pkg/registry/resource/devicetaintrule/storage/storage.go @@ -17,14 +17,19 @@ limitations under the License. package storage import ( + "context" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apiserver/pkg/registry/generic" genericregistry "k8s.io/apiserver/pkg/registry/generic/registry" + "k8s.io/apiserver/pkg/registry/rest" "k8s.io/kubernetes/pkg/apis/resource" "k8s.io/kubernetes/pkg/printers" printersinternal "k8s.io/kubernetes/pkg/printers/internalversion" printerstorage "k8s.io/kubernetes/pkg/printers/storage" "k8s.io/kubernetes/pkg/registry/resource/devicetaintrule" + "sigs.k8s.io/structured-merge-diff/v6/fieldpath" ) // REST implements a RESTStorage for DeviceTaintRule. @@ -33,7 +38,7 @@ type REST struct { } // NewREST returns a RESTStorage object that will work against DeviceTaintRule. -func NewREST(optsGetter generic.RESTOptionsGetter) (*REST, error) { +func NewREST(optsGetter generic.RESTOptionsGetter) (*REST, *StatusREST, error) { store := &genericregistry.Store{ NewFunc: func() runtime.Object { return &resource.DeviceTaintRule{} }, NewListFunc: func() runtime.Object { return &resource.DeviceTaintRuleList{} }, @@ -44,13 +49,50 @@ func NewREST(optsGetter generic.RESTOptionsGetter) (*REST, error) { UpdateStrategy: devicetaintrule.Strategy, DeleteStrategy: devicetaintrule.Strategy, ReturnDeletedObject: true, + ResetFieldsStrategy: devicetaintrule.Strategy, TableConvertor: printerstorage.TableConvertor{TableGenerator: printers.NewTableGenerator().With(printersinternal.AddHandlers)}, } options := &generic.StoreOptions{RESTOptions: optsGetter} if err := store.CompleteWithOptions(options); err != nil { - return nil, err + return nil, nil, err } - return &REST{store}, nil + statusStore := *store + statusStore.UpdateStrategy = devicetaintrule.StatusStrategy + statusStore.ResetFieldsStrategy = devicetaintrule.StatusStrategy + + return &REST{store}, &StatusREST{store: &statusStore}, nil +} + +// StatusREST implements the REST endpoint for changing the status of a DeviceTaintRule. +type StatusREST struct { + store *genericregistry.Store +} + +// New creates a new DeviceTaintRule object. +func (r *StatusREST) New() runtime.Object { + return &resource.DeviceTaintRule{} +} + +func (r *StatusREST) Destroy() { + // Given that underlying store is shared with REST, + // we don't destroy it here explicitly. +} + +// Get retrieves the object from the storage. It is required to support Patch. +func (r *StatusREST) Get(ctx context.Context, name string, options *metav1.GetOptions) (runtime.Object, error) { + return r.store.Get(ctx, name, options) +} + +// Update alters the status subset of an object. +func (r *StatusREST) Update(ctx context.Context, name string, objInfo rest.UpdatedObjectInfo, createValidation rest.ValidateObjectFunc, updateValidation rest.ValidateObjectUpdateFunc, forceAllowCreate bool, options *metav1.UpdateOptions) (runtime.Object, bool, error) { + // We are explicitly setting forceAllowCreate to false in the call to the underlying storage because + // subresources should never allow create on update. + return r.store.Update(ctx, name, objInfo, createValidation, updateValidation, false, options) +} + +// GetResetFields implements rest.ResetFieldsStrategy +func (r *StatusREST) GetResetFields() map[fieldpath.APIVersion]*fieldpath.Set { + return r.store.GetResetFields() } diff --git a/pkg/registry/resource/devicetaintrule/storage/storage_test.go b/pkg/registry/resource/devicetaintrule/storage/storage_test.go index c45701a17cc..fa4a88d3434 100644 --- a/pkg/registry/resource/devicetaintrule/storage/storage_test.go +++ b/pkg/registry/resource/devicetaintrule/storage/storage_test.go @@ -20,19 +20,24 @@ import ( "testing" "time" + "github.com/google/go-cmp/cmp" + + apiequality "k8s.io/apimachinery/pkg/api/equality" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/fields" "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/runtime" + genericapirequest "k8s.io/apiserver/pkg/endpoints/request" "k8s.io/apiserver/pkg/registry/generic" genericregistrytest "k8s.io/apiserver/pkg/registry/generic/testing" + "k8s.io/apiserver/pkg/registry/rest" etcd3testing "k8s.io/apiserver/pkg/storage/etcd3/testing" "k8s.io/kubernetes/pkg/apis/resource" _ "k8s.io/kubernetes/pkg/apis/resource/install" "k8s.io/kubernetes/pkg/registry/registrytest" ) -func newStorage(t *testing.T) (*REST, *etcd3testing.EtcdTestServer) { +func newStorage(t *testing.T) (*REST, *StatusREST, *etcd3testing.EtcdTestServer) { etcdStorage, server := registrytest.NewEtcdStorageForResource(t, resource.Resource("devicetaintrules")) restOptions := generic.RESTOptions{ StorageConfig: etcdStorage, @@ -40,11 +45,11 @@ func newStorage(t *testing.T) (*REST, *etcd3testing.EtcdTestServer) { DeleteCollectionWorkers: 1, ResourcePrefix: "devicetaintrules", } - deviceTaintStorage, err := NewREST(restOptions) + deviceTaintStorage, statusStorage, err := NewREST(restOptions) if err != nil { t.Fatalf("unexpected error from REST storage: %v", err) } - return deviceTaintStorage, server + return deviceTaintStorage, statusStorage, server } func validNewDeviceTaint(name string) *resource.DeviceTaintRule { @@ -63,7 +68,7 @@ func validNewDeviceTaint(name string) *resource.DeviceTaintRule { } func TestCreate(t *testing.T) { - storage, server := newStorage(t) + storage, _, server := newStorage(t) defer server.Terminate(t) defer storage.Store.DestroyFunc() test := genericregistrytest.New(t, storage.Store).ClusterScope() @@ -80,7 +85,7 @@ func TestCreate(t *testing.T) { } func TestUpdate(t *testing.T) { - storage, server := newStorage(t) + storage, _, server := newStorage(t) defer server.Terminate(t) defer storage.Store.DestroyFunc() test := genericregistrytest.New(t, storage.Store).ClusterScope() @@ -98,7 +103,7 @@ func TestUpdate(t *testing.T) { } func TestDelete(t *testing.T) { - storage, server := newStorage(t) + storage, _, server := newStorage(t) defer server.Terminate(t) defer storage.Store.DestroyFunc() test := genericregistrytest.New(t, storage.Store).ClusterScope().ReturnDeletedObject() @@ -106,7 +111,7 @@ func TestDelete(t *testing.T) { } func TestGet(t *testing.T) { - storage, server := newStorage(t) + storage, _, server := newStorage(t) defer server.Terminate(t) defer storage.Store.DestroyFunc() test := genericregistrytest.New(t, storage.Store).ClusterScope() @@ -114,7 +119,7 @@ func TestGet(t *testing.T) { } func TestList(t *testing.T) { - storage, server := newStorage(t) + storage, _, server := newStorage(t) defer server.Terminate(t) defer storage.Store.DestroyFunc() test := genericregistrytest.New(t, storage.Store).ClusterScope() @@ -122,7 +127,7 @@ func TestList(t *testing.T) { } func TestWatch(t *testing.T) { - storage, server := newStorage(t) + storage, _, server := newStorage(t) defer server.Terminate(t) defer storage.Store.DestroyFunc() test := genericregistrytest.New(t, storage.Store).ClusterScope() @@ -144,3 +149,39 @@ func TestWatch(t *testing.T) { }, ) } + +func TestUpdateStatus(t *testing.T) { + storage, statusStorage, server := newStorage(t) + defer server.Terminate(t) + defer storage.Store.DestroyFunc() + ctx := genericapirequest.NewDefaultContext() + + key, _ := storage.KeyFunc(ctx, "foo") + deviceTaintStart := validNewDeviceTaint("foo") + err := storage.Storage.Create(ctx, key, deviceTaintStart, nil, 0, false) + if err != nil { + t.Fatalf("Unexpected error: %v", err) + } + + deviceTaint := deviceTaintStart.DeepCopy() + deviceTaint.Status.Conditions = []metav1.Condition{{ + Type: "EvicitionInProgress", + Status: metav1.ConditionTrue, + Reason: "PodsLeft", + Message: "100 pods left", + LastTransitionTime: metav1.Time{Time: time.Now().Truncate(time.Second)}, + }} + _, _, err = statusStorage.Update(ctx, deviceTaint.Name, rest.DefaultUpdatedObjectInfo(deviceTaint), rest.ValidateAllObjectFunc, rest.ValidateAllObjectUpdateFunc, false, &metav1.UpdateOptions{}) + if err != nil { + t.Fatalf("Unexpected error: %v", err) + } + obj, err := storage.Get(ctx, "foo", &metav1.GetOptions{}) + if err != nil { + t.Errorf("unexpected error: %v", err) + } + deviceTaintOut := obj.(*resource.DeviceTaintRule) + // only compare relevant changes b/c of difference in metadata + if !apiequality.Semantic.DeepEqual(deviceTaint.Status, deviceTaintOut.Status) { + t.Errorf("unexpected object: %s", cmp.Diff(deviceTaint.Status, deviceTaintOut.Status)) + } +} diff --git a/pkg/registry/resource/devicetaintrule/strategy.go b/pkg/registry/resource/devicetaintrule/strategy.go index d458fc7d538..dc8325ea051 100644 --- a/pkg/registry/resource/devicetaintrule/strategy.go +++ b/pkg/registry/resource/devicetaintrule/strategy.go @@ -20,12 +20,14 @@ import ( "context" apiequality "k8s.io/apimachinery/pkg/api/equality" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/util/validation/field" "k8s.io/apiserver/pkg/storage/names" "k8s.io/kubernetes/pkg/api/legacyscheme" "k8s.io/kubernetes/pkg/apis/resource" "k8s.io/kubernetes/pkg/apis/resource/validation" + "sigs.k8s.io/structured-merge-diff/v6/fieldpath" ) // deviceTaintRuleStrategy implements behavior for DeviceTaintRule objects @@ -34,51 +36,105 @@ type deviceTaintRuleStrategy struct { names.NameGenerator } -var Strategy = deviceTaintRuleStrategy{legacyscheme.Scheme, names.SimpleNameGenerator} +var ( + Strategy = &deviceTaintRuleStrategy{legacyscheme.Scheme, names.SimpleNameGenerator} + StatusStrategy = &deviceTaintRuleStatusStrategy{deviceTaintRuleStrategy: Strategy} +) func (deviceTaintRuleStrategy) NamespaceScoped() bool { return false } -func (deviceTaintRuleStrategy) PrepareForCreate(ctx context.Context, obj runtime.Object) { - patch := obj.(*resource.DeviceTaintRule) - patch.Generation = 1 +// GetResetFields returns the set of fields that get reset by the strategy and +// should not be modified by the user. For a new DeviceTaintRule that is the +// status. +func (*deviceTaintRuleStrategy) GetResetFields() map[fieldpath.APIVersion]*fieldpath.Set { + fields := map[fieldpath.APIVersion]*fieldpath.Set{ + "resource.k8s.io/v1alpha3": fieldpath.NewSet( + fieldpath.MakePathOrDie("status"), + ), + } + + return fields } -func (deviceTaintRuleStrategy) Validate(ctx context.Context, obj runtime.Object) field.ErrorList { - patch := obj.(*resource.DeviceTaintRule) - return validation.ValidateDeviceTaintRule(patch) +func (*deviceTaintRuleStrategy) PrepareForCreate(ctx context.Context, obj runtime.Object) { + rule := obj.(*resource.DeviceTaintRule) + // Status must not be set by user on create. + rule.Status = resource.DeviceTaintRuleStatus{} + rule.Generation = 1 } -func (deviceTaintRuleStrategy) WarningsOnCreate(ctx context.Context, obj runtime.Object) []string { +func (*deviceTaintRuleStrategy) Validate(ctx context.Context, obj runtime.Object) field.ErrorList { + rule := obj.(*resource.DeviceTaintRule) + return validation.ValidateDeviceTaintRule(rule) +} + +func (*deviceTaintRuleStrategy) WarningsOnCreate(ctx context.Context, obj runtime.Object) []string { return nil } -func (deviceTaintRuleStrategy) Canonicalize(obj runtime.Object) { +func (*deviceTaintRuleStrategy) Canonicalize(obj runtime.Object) { } -func (deviceTaintRuleStrategy) AllowCreateOnUpdate() bool { +func (*deviceTaintRuleStrategy) AllowCreateOnUpdate() bool { return false } -func (deviceTaintRuleStrategy) PrepareForUpdate(ctx context.Context, obj, old runtime.Object) { - patch := obj.(*resource.DeviceTaintRule) - oldPatch := old.(*resource.DeviceTaintRule) +func (*deviceTaintRuleStrategy) PrepareForUpdate(ctx context.Context, obj, old runtime.Object) { + rule := obj.(*resource.DeviceTaintRule) + oldRule := old.(*resource.DeviceTaintRule) + rule.Status = oldRule.Status // Any changes to the spec increment the generation number. - if !apiequality.Semantic.DeepEqual(oldPatch.Spec, patch.Spec) { - patch.Generation = oldPatch.Generation + 1 + if !apiequality.Semantic.DeepEqual(oldRule.Spec, rule.Spec) { + rule.Generation = oldRule.Generation + 1 } } -func (deviceTaintRuleStrategy) ValidateUpdate(ctx context.Context, obj, old runtime.Object) field.ErrorList { +func (*deviceTaintRuleStrategy) ValidateUpdate(ctx context.Context, obj, old runtime.Object) field.ErrorList { return validation.ValidateDeviceTaintRuleUpdate(obj.(*resource.DeviceTaintRule), old.(*resource.DeviceTaintRule)) } -func (deviceTaintRuleStrategy) WarningsOnUpdate(ctx context.Context, obj, old runtime.Object) []string { +func (*deviceTaintRuleStrategy) WarningsOnUpdate(ctx context.Context, obj, old runtime.Object) []string { return nil } -func (deviceTaintRuleStrategy) AllowUnconditionalUpdate() bool { +func (*deviceTaintRuleStrategy) AllowUnconditionalUpdate() bool { return true } + +type deviceTaintRuleStatusStrategy struct { + *deviceTaintRuleStrategy +} + +// GetResetFields returns the set of fields that get reset by the strategy and +// should not be modified by the user. For a status update that is the spec. +func (*deviceTaintRuleStatusStrategy) GetResetFields() map[fieldpath.APIVersion]*fieldpath.Set { + fields := map[fieldpath.APIVersion]*fieldpath.Set{ + "resource.k8s.io/v1alpha3": fieldpath.NewSet( + fieldpath.MakePathOrDie("metadata"), + fieldpath.MakePathOrDie("spec"), + ), + } + + return fields +} + +func (*deviceTaintRuleStatusStrategy) PrepareForUpdate(ctx context.Context, obj, old runtime.Object) { + newRule := obj.(*resource.DeviceTaintRule) + oldRule := old.(*resource.DeviceTaintRule) + newRule.Spec = oldRule.Spec + metav1.ResetObjectMetaForStatus(&newRule.ObjectMeta, &oldRule.ObjectMeta) +} + +func (r *deviceTaintRuleStatusStrategy) ValidateUpdate(ctx context.Context, obj, old runtime.Object) field.ErrorList { + newRule := obj.(*resource.DeviceTaintRule) + oldRule := old.(*resource.DeviceTaintRule) + return validation.ValidateDeviceTaintRuleStatusUpdate(newRule, oldRule) +} + +// WarningsOnUpdate returns warnings for the given update. +func (*deviceTaintRuleStatusStrategy) WarningsOnUpdate(ctx context.Context, obj, old runtime.Object) []string { + return nil +} diff --git a/pkg/registry/resource/devicetaintrule/strategy_test.go b/pkg/registry/resource/devicetaintrule/strategy_test.go index 7923a8e981c..19993748c09 100644 --- a/pkg/registry/resource/devicetaintrule/strategy_test.go +++ b/pkg/registry/resource/devicetaintrule/strategy_test.go @@ -19,14 +19,17 @@ package devicetaintrule import ( "testing" + "github.com/stretchr/testify/assert" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" genericapirequest "k8s.io/apiserver/pkg/endpoints/request" "k8s.io/kubernetes/pkg/apis/resource" ) -var patch = &resource.DeviceTaintRule{ +var obj = &resource.DeviceTaintRule{ ObjectMeta: metav1.ObjectMeta{ - Name: "valid-patch", + Name: "valid-patch", + Generation: 1, }, Spec: resource.DeviceTaintRuleSpec{ Taint: resource.DeviceTaint{ @@ -36,6 +39,31 @@ var patch = &resource.DeviceTaintRule{ }, } +var objWithStatus = &resource.DeviceTaintRule{ + ObjectMeta: metav1.ObjectMeta{ + Name: "valid-patch", + Generation: 1, + }, + Spec: resource.DeviceTaintRuleSpec{ + Taint: resource.DeviceTaint{ + Key: "example.com/tainted", + Effect: resource.DeviceTaintEffectNoExecute, + }, + }, + Status: resource.DeviceTaintRuleStatus{ + Conditions: []metav1.Condition{{ + Type: "foo", + Status: metav1.ConditionFalse, + LastTransitionTime: metav1.Now(), + Reason: "something", + Message: "else", + }}, + }, +} + +var fieldImmutableError = "field is immutable" +var metadataError = "a lowercase RFC 1123 subdomain must consist of lower case alphanumeric characters" + func TestDeviceTaintRuleStrategy(t *testing.T) { if Strategy.NamespaceScoped() { t.Errorf("DeviceTaintRule must not be namespace scoped") @@ -47,40 +75,202 @@ func TestDeviceTaintRuleStrategy(t *testing.T) { func TestDeviceTaintRuleStrategyCreate(t *testing.T) { ctx := genericapirequest.NewDefaultContext() - patch := patch.DeepCopy() + testcases := map[string]struct { + obj *resource.DeviceTaintRule + expectValidationError string + expectObj *resource.DeviceTaintRule + }{ + "simple": { + obj: obj, + expectObj: obj, + }, + "validation-error": { + obj: func() *resource.DeviceTaintRule { + obj := obj.DeepCopy() + obj.Name = "%#@$%$" + return obj + }(), + expectValidationError: metadataError, + }, + "drop-status": { + obj: objWithStatus, + expectObj: obj, + }, + "set-generation": { + obj: func() *resource.DeviceTaintRule { + obj := obj.DeepCopy() + obj.Generation = 42 // Cannot be set by client on create, overwritten with 1. + return obj + }(), + expectObj: obj, + }, + } - Strategy.PrepareForCreate(ctx, patch) - errs := Strategy.Validate(ctx, patch) - if len(errs) != 0 { - t.Errorf("unexpected error validating for create %v", errs) + for name, tc := range testcases { + t.Run(name, func(t *testing.T) { + obj := tc.obj.DeepCopy() + Strategy.PrepareForCreate(ctx, obj) + if errs := Strategy.Validate(ctx, obj); len(errs) != 0 { + if tc.expectValidationError == "" { + t.Fatalf("unexpected error(s): %v", errs) + } + assert.ErrorContains(t, errs[0], tc.expectValidationError, "the error message should have contained the expected error message") + return + } + if tc.expectValidationError != "" { + t.Fatal("expected validation error(s), got none") + } + if warnings := Strategy.WarningsOnCreate(ctx, obj); len(warnings) != 0 { + t.Fatalf("unexpected warnings: %q", warnings) + } + Strategy.Canonicalize(obj) + assert.Equal(t, tc.expectObj, obj) + }) } } func TestDeviceTaintRuleStrategyUpdate(t *testing.T) { - t.Run("no-changes-okay", func(t *testing.T) { - ctx := genericapirequest.NewDefaultContext() - patch := patch.DeepCopy() - newPatch := patch.DeepCopy() - newPatch.ResourceVersion = "4" + ctx := genericapirequest.NewDefaultContext() - Strategy.PrepareForUpdate(ctx, newPatch, patch) - errs := Strategy.ValidateUpdate(ctx, newPatch, patch) - if len(errs) != 0 { - t.Errorf("unexpected validation errors: %v", errs) - } - }) + testcases := map[string]struct { + oldObj *resource.DeviceTaintRule + newObj *resource.DeviceTaintRule + expectValidationError string + expectObj *resource.DeviceTaintRule + }{ + "no-changes-okay": { + oldObj: obj, + newObj: obj, + expectObj: obj, + }, + "name-change-not-allowed": { + oldObj: obj, + newObj: func() *resource.DeviceTaintRule { + obj := obj.DeepCopy() + obj.Name += "-2" + return obj + }(), + expectValidationError: fieldImmutableError, + }, + "drop-status": { + oldObj: obj, + newObj: objWithStatus, + expectObj: obj, + }, + "bump-generation": { + oldObj: obj, + newObj: func() *resource.DeviceTaintRule { + obj := obj.DeepCopy() + obj.Spec.Taint.Effect = resource.DeviceTaintEffectNone + return obj + }(), + expectObj: func() *resource.DeviceTaintRule { + obj := obj.DeepCopy() + obj.Spec.Taint.Effect = resource.DeviceTaintEffectNone + obj.Generation++ + return obj + }(), + }, + } - t.Run("name-change-not-allowed", func(t *testing.T) { - ctx := genericapirequest.NewDefaultContext() - patch := patch.DeepCopy() - newPatch := patch.DeepCopy() - newPatch.Name = "valid-patch-2" - newPatch.ResourceVersion = "4" + for name, tc := range testcases { + t.Run(name, func(t *testing.T) { + oldObj := tc.oldObj.DeepCopy() + newObj := tc.newObj.DeepCopy() + newObj.ResourceVersion = "4" - Strategy.PrepareForUpdate(ctx, newPatch, patch) - errs := Strategy.ValidateUpdate(ctx, newPatch, patch) - if len(errs) == 0 { - t.Errorf("expected a validation error") - } - }) + Strategy.PrepareForUpdate(ctx, newObj, oldObj) + if errs := Strategy.ValidateUpdate(ctx, newObj, oldObj); len(errs) != 0 { + if tc.expectValidationError == "" { + t.Fatalf("unexpected error(s): %v", errs) + } + assert.ErrorContains(t, errs[0], tc.expectValidationError, "the error message should have contained the expected error message") + return + } + if tc.expectValidationError != "" { + t.Fatal("expected validation error(s), got none") + } + if warnings := Strategy.WarningsOnUpdate(ctx, newObj, oldObj); len(warnings) != 0 { + t.Fatalf("unexpected warnings: %q", warnings) + } + Strategy.Canonicalize(newObj) + expectObj := tc.expectObj.DeepCopy() + expectObj.ResourceVersion = "4" + assert.Equal(t, expectObj, newObj) + }) + } +} + +func TestStatusStrategyUpdate(t *testing.T) { + ctx := genericapirequest.NewDefaultContext() + testcases := map[string]struct { + oldObj *resource.DeviceTaintRule + newObj *resource.DeviceTaintRule + expectValidationError string + expectObj *resource.DeviceTaintRule + }{ + "no-changes-okay": { + oldObj: obj, + newObj: obj, + expectObj: obj, + }, + "name-change-not-allowed": { + oldObj: obj, + newObj: func() *resource.DeviceTaintRule { + obj := obj.DeepCopy() + obj.Name += "-2" + return obj + }(), + expectValidationError: fieldImmutableError, + }, + // Cannot add finalizers, annotations and labels during status update. + "drop-meta-changes": { + oldObj: obj, + newObj: func() *resource.DeviceTaintRule { + obj := obj.DeepCopy() + obj.Finalizers = []string{"foo"} + obj.Annotations = map[string]string{"foo": "bar"} + obj.Labels = map[string]string{"foo": "bar"} + return obj + }(), + expectObj: obj, + }, + "drop-spec": { + oldObj: obj, + newObj: func() *resource.DeviceTaintRule { + obj := obj.DeepCopy() + obj.Spec.Taint.Effect = resource.DeviceTaintEffectNone + return obj + }(), + expectObj: obj, + }, + } + + for name, tc := range testcases { + t.Run(name, func(t *testing.T) { + oldObj := tc.oldObj.DeepCopy() + newObj := tc.newObj.DeepCopy() + newObj.ResourceVersion = "4" + + StatusStrategy.PrepareForUpdate(ctx, newObj, oldObj) + if errs := StatusStrategy.ValidateUpdate(ctx, newObj, oldObj); len(errs) != 0 { + if tc.expectValidationError == "" { + t.Fatalf("unexpected error(s): %v", errs) + } + assert.ErrorContains(t, errs[0], tc.expectValidationError, "the error message should have contained the expected error message") + return + } + if tc.expectValidationError != "" { + t.Fatal("expected validation error(s), got none") + } + if warnings := StatusStrategy.WarningsOnUpdate(ctx, newObj, oldObj); len(warnings) != 0 { + t.Fatalf("unexpected warnings: %q", warnings) + } + StatusStrategy.Canonicalize(newObj) + + expectObj := tc.expectObj.DeepCopy() + expectObj.ResourceVersion = "4" + assert.Equal(t, expectObj, newObj) + }) + } } diff --git a/pkg/registry/resource/resourceclaim/strategy.go b/pkg/registry/resource/resourceclaim/strategy.go index 5dd6db1ba34..ca6d25237fc 100644 --- a/pkg/registry/resource/resourceclaim/strategy.go +++ b/pkg/registry/resource/resourceclaim/strategy.go @@ -153,15 +153,19 @@ func NewStatusStrategy(resourceclaimStrategy *resourceclaimStrategy) *resourcecl func (*resourceclaimStatusStrategy) GetResetFields() map[fieldpath.APIVersion]*fieldpath.Set { fields := map[fieldpath.APIVersion]*fieldpath.Set{ "resource.k8s.io/v1alpha3": fieldpath.NewSet( + fieldpath.MakePathOrDie("metadata"), fieldpath.MakePathOrDie("spec"), ), "resource.k8s.io/v1beta1": fieldpath.NewSet( + fieldpath.MakePathOrDie("metadata"), fieldpath.MakePathOrDie("spec"), ), "resource.k8s.io/v1beta2": fieldpath.NewSet( + fieldpath.MakePathOrDie("metadata"), fieldpath.MakePathOrDie("spec"), ), "resource.k8s.io/v1": fieldpath.NewSet( + fieldpath.MakePathOrDie("metadata"), fieldpath.MakePathOrDie("spec"), ), } diff --git a/pkg/registry/resource/rest/storage_resource.go b/pkg/registry/resource/rest/storage_resource.go index 27660fea478..f960b6a6462 100644 --- a/pkg/registry/resource/rest/storage_resource.go +++ b/pkg/registry/resource/rest/storage_resource.go @@ -118,11 +118,12 @@ func (p RESTStorageProvider) v1alpha3Storage(apiResourceConfigSource serverstora storage := map[string]rest.Storage{} if resource := "devicetaintrules"; apiResourceConfigSource.ResourceEnabled(resourcev1alpha3.SchemeGroupVersion.WithResource(resource)) { - deviceTaintStorage, err := devicetaintrulestore.NewREST(restOptionsGetter) + deviceTaintStorage, deviceTaintStatusStorage, err := devicetaintrulestore.NewREST(restOptionsGetter) if err != nil { return nil, err } storage[resource] = deviceTaintStorage + storage[resource+"/status"] = deviceTaintStatusStorage } return storage, nil diff --git a/staging/src/k8s.io/api/resource/v1/types.go b/staging/src/k8s.io/api/resource/v1/types.go index f698be61969..15b897e5c0d 100644 --- a/staging/src/k8s.io/api/resource/v1/types.go +++ b/staging/src/k8s.io/api/resource/v1/types.go @@ -149,7 +149,7 @@ type ResourceSliceSpec struct { // Devices lists some or all of the devices in this pool. // - // Must not have more than 128 entries. + // Must not have more than 128 entries. If any device uses taints the limit is 64. // // +optional // +listType=atomic @@ -250,6 +250,7 @@ type ResourcePool struct { const ResourceSliceMaxSharedCapacity = 128 const ResourceSliceMaxDevices = 128 +const ResourceSliceMaxDevicesWithTaints = 64 const PoolNameMaxLength = validation.DNS1123SubdomainMaxLength // Same as for a single node name. const BindingConditionsMaxSize = 4 const BindingFailureConditionsMaxSize = 4 @@ -333,7 +334,9 @@ type Device struct { // If specified, these are the driver-defined taints. // - // The maximum number of taints is 4. + // The maximum number of taints is 16. If taints are set for + // any device in a ResourceSlice, then the maximum number of + // allowed devices per ResourceSlice is 64 instead of 128. // // This is an alpha field and requires enabling the DRADeviceTaints // feature gate. @@ -618,8 +621,8 @@ type DeviceAttribute struct { // DeviceAttributeMaxValueLength is the maximum length of a string or version attribute value. const DeviceAttributeMaxValueLength = 64 -// DeviceTaintsMaxLength is the maximum number of taints per device. -const DeviceTaintsMaxLength = 4 +// DeviceTaintsMaxLength is the maximum number of taints per Device. +const DeviceTaintsMaxLength = 16 // The device this taint is attached to has the "effect" on // any claim which does not tolerate the taint and, through the claim, @@ -641,8 +644,10 @@ type DeviceTaint struct { // The effect of the taint on claims that do not tolerate the taint // and through such claims on the pods using them. - // Valid effects are NoSchedule and NoExecute. PreferNoSchedule as used for - // nodes is not valid here. + // + // Valid effects are None, NoSchedule and NoExecute. PreferNoSchedule as used for + // nodes is not valid here. More effects may get added in the future. + // Consumers must treat unknown effects like None. // // +required // +k8s:required @@ -652,6 +657,14 @@ type DeviceTaint struct { // // Implementing PreferNoSchedule would depend on a scoring solution for DRA. // It might get added as part of that. + // + // A possible future new effect is NoExecuteWithPodDisruptionBudget: + // honor the pod disruption budget instead of simply deleting pods. + // This is currently undecided, it could also be a separate field. + // + // Validation must be prepared to allow unknown enums in stored objects, + // which will enable adding new enums within a single release without + // ratcheting. // TimeAdded represents the time at which the taint was added. // Added automatically during create or update if not set. @@ -671,6 +684,9 @@ type DeviceTaint struct { type DeviceTaintEffect string const ( + // No effect, the taint is purely informational. + DeviceTaintEffectNone DeviceTaintEffect = "None" + // Do not allow new pods to schedule which use a tainted device unless they tolerate the taint, // but allow all pods submitted to Kubelet without going through the scheduler // to start, and allow all already-running pods to continue running. diff --git a/staging/src/k8s.io/api/resource/v1alpha3/types.go b/staging/src/k8s.io/api/resource/v1alpha3/types.go index da9a9ca286b..3ea3e7bafd5 100644 --- a/staging/src/k8s.io/api/resource/v1alpha3/types.go +++ b/staging/src/k8s.io/api/resource/v1alpha3/types.go @@ -134,8 +134,10 @@ type DeviceTaint struct { // The effect of the taint on claims that do not tolerate the taint // and through such claims on the pods using them. - // Valid effects are NoSchedule and NoExecute. PreferNoSchedule as used for - // nodes is not valid here. + // + // Valid effects are None, NoSchedule and NoExecute. PreferNoSchedule as used for + // nodes is not valid here. More effects may get added in the future. + // Consumers must treat unknown effects like None. // // +required Effect DeviceTaintEffect `json:"effect" protobuf:"bytes,3,name=effect,casttype=DeviceTaintEffect"` @@ -144,6 +146,14 @@ type DeviceTaint struct { // // Implementing PreferNoSchedule would depend on a scoring solution for DRA. // It might get added as part of that. + // + // A possible future new effect is NoExecuteWithPodDisruptionBudget: + // honor the pod disruption budget instead of simply deleting pods. + // This is currently undecided, it could also be a separate field. + // + // Validation must be prepared to allow unknown enums in stored objects, + // which will enable adding new enums within a single release without + // ratcheting. // TimeAdded represents the time at which the taint was added. // Added automatically during create or update if not set. @@ -162,6 +172,9 @@ type DeviceTaint struct { type DeviceTaintEffect string const ( + // No effect, the taint is purely informational. + DeviceTaintEffectNone DeviceTaintEffect = "None" + // Do not allow new pods to schedule which use a tainted device unless they tolerate the taint, // but allow all pods submitted to Kubelet without going through the scheduler // to start, and allow all already-running pods to continue running. @@ -190,18 +203,16 @@ type DeviceTaintRule struct { // Changing the spec automatically increments the metadata.generation number. Spec DeviceTaintRuleSpec `json:"spec" protobuf:"bytes,2,name=spec"` - // ^^^ - // A spec gets added because adding a status seems likely. - // Such a status could provide feedback on applying the - // eviction and/or statistics (number of matching devices, - // affected allocated claims, pods remaining to be evicted, - // etc.). + // Status provides information about what was requested in the spec. + // + // +optional + Status DeviceTaintRuleStatus `json:"status,omitempty" protobuf:"bytes,3,opt,name=status"` } // DeviceTaintRuleSpec specifies the selector and one taint. type DeviceTaintRuleSpec struct { // DeviceSelector defines which device(s) the taint is applied to. - // All selector criteria must be satified for a device to + // All selector criteria must be satisfied for a device to // match. The empty selector matches all devices. Without // a selector, no devices are matches. // @@ -261,6 +272,43 @@ type DeviceTaintSelector struct { Selectors []DeviceSelector `json:"selectors,omitempty" protobuf:"bytes,5,rep,name=selectors"` } +// DeviceTaintRuleStatus provides information about an on-going pod eviction. +type DeviceTaintRuleStatus struct { + // Conditions provide information about the state of the DeviceTaintRule + // and the cluster at some point in time, + // in a machine-readable and human-readable format. + // + // The following condition is currently defined as part of this API, more may + // get added: + // - Type: EvictionInProgress + // - Status: True if there are currently pods which need to be evicted, False otherwise + // (includes the effects which don't cause eviction). + // - Reason: not specified, may change + // - Message: includes information about number of pending pods and already evicted pods + // in a human-readable format, updated periodically, may change + // + // For `effect: None`, the condition above gets set once for each change to + // the spec, with the message containing information about what would happen + // if the effect was `NoExecute`. This feedback can be used to decide whether + // changing the effect to `NoExecute` will work as intended. It only gets + // set once to avoid having to constantly update the status. + // + // Must have 8 or fewer entries. + // + // +optional + // +listType=map + // +listMapKey=type + // +patchStrategy=merge + // +patchMergeKey=type + Conditions []metav1.Condition `json:"conditions,omitempty" patchStrategy:"merge" patchMergeKey:"type" protobuf:"bytes,1,rep,name=conditions"` +} + +// DeviceTaintRuleStatusMaxConditions is the maximum number of conditions in DeviceTaintRuleStatus. +const DeviceTaintRuleStatusMaxConditions = 8 + +// DeviceTaintConditionEvictionInProgress is the publicly documented condition type for the DeviceTaintRuleStatus. +const DeviceTaintConditionEvictionInProgress = "EvictionInProgress" + // +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object // +k8s:prerelease-lifecycle-gen:introduced=1.33 diff --git a/staging/src/k8s.io/api/resource/v1beta1/types.go b/staging/src/k8s.io/api/resource/v1beta1/types.go index e63de0d1bcc..358923fb3be 100644 --- a/staging/src/k8s.io/api/resource/v1beta1/types.go +++ b/staging/src/k8s.io/api/resource/v1beta1/types.go @@ -149,7 +149,7 @@ type ResourceSliceSpec struct { // Devices lists some or all of the devices in this pool. // - // Must not have more than 128 entries. + // Must not have more than 128 entries. If any device uses taints the limit is 64. // // +optional // +listType=atomic @@ -258,6 +258,7 @@ type ResourcePool struct { const ResourceSliceMaxSharedCapacity = 128 const ResourceSliceMaxDevices = 128 +const ResourceSliceMaxDevicesWithTaints = 64 const PoolNameMaxLength = validation.DNS1123SubdomainMaxLength // Same as for a single node name. const BindingConditionsMaxSize = 4 const BindingFailureConditionsMaxSize = 4 @@ -345,7 +346,9 @@ type BasicDevice struct { // If specified, these are the driver-defined taints. // - // The maximum number of taints is 4. + // The maximum number of taints is 16. If taints are set for + // any device in a ResourceSlice, then the maximum number of + // allowed devices per ResourceSlice is 64 instead of 128. // // This is an alpha field and requires enabling the DRADeviceTaints // feature gate. @@ -622,8 +625,8 @@ type DeviceAttribute struct { // DeviceAttributeMaxValueLength is the maximum length of a string or version attribute value. const DeviceAttributeMaxValueLength = 64 -// DeviceTaintsMaxLength is the maximum number of taints per device. -const DeviceTaintsMaxLength = 4 +// DeviceTaintsMaxLength is the maximum number of taints per Device. +const DeviceTaintsMaxLength = 16 // The device this taint is attached to has the "effect" on // any claim which does not tolerate the taint and, through the claim, @@ -645,8 +648,10 @@ type DeviceTaint struct { // The effect of the taint on claims that do not tolerate the taint // and through such claims on the pods using them. - // Valid effects are NoSchedule and NoExecute. PreferNoSchedule as used for - // nodes is not valid here. + // + // Valid effects are None, NoSchedule and NoExecute. PreferNoSchedule as used for + // nodes is not valid here. More effects may get added in the future. + // Consumers must treat unknown effects like None. // // +required // +k8s:required @@ -656,6 +661,14 @@ type DeviceTaint struct { // // Implementing PreferNoSchedule would depend on a scoring solution for DRA. // It might get added as part of that. + // + // A possible future new effect is NoExecuteWithPodDisruptionBudget: + // honor the pod disruption budget instead of simply deleting pods. + // This is currently undecided, it could also be a separate field. + // + // Validation must be prepared to allow unknown enums in stored objects, + // which will enable adding new enums within a single release without + // ratcheting. // TimeAdded represents the time at which the taint was added. // Added automatically during create or update if not set. @@ -675,6 +688,9 @@ type DeviceTaint struct { type DeviceTaintEffect string const ( + // No effect, the taint is purely informational. + DeviceTaintEffectNone DeviceTaintEffect = "None" + // Do not allow new pods to schedule which use a tainted device unless they tolerate the taint, // but allow all pods submitted to Kubelet without going through the scheduler // to start, and allow all already-running pods to continue running. diff --git a/staging/src/k8s.io/api/resource/v1beta2/types.go b/staging/src/k8s.io/api/resource/v1beta2/types.go index c3e06f3edb5..65934ced465 100644 --- a/staging/src/k8s.io/api/resource/v1beta2/types.go +++ b/staging/src/k8s.io/api/resource/v1beta2/types.go @@ -149,7 +149,7 @@ type ResourceSliceSpec struct { // Devices lists some or all of the devices in this pool. // - // Must not have more than 128 entries. + // Must not have more than 128 entries. If any device uses taints the limit is 64. // // +optional // +listType=atomic @@ -250,6 +250,7 @@ type ResourcePool struct { const ResourceSliceMaxSharedCapacity = 128 const ResourceSliceMaxDevices = 128 +const ResourceSliceMaxDevicesWithTaints = 64 const PoolNameMaxLength = validation.DNS1123SubdomainMaxLength // Same as for a single node name. const BindingConditionsMaxSize = 4 const BindingFailureConditionsMaxSize = 4 @@ -333,7 +334,9 @@ type Device struct { // If specified, these are the driver-defined taints. // - // The maximum number of taints is 4. + // The maximum number of taints is 16. If taints are set for + // any device in a ResourceSlice, then the maximum number of + // allowed devices per ResourceSlice is 64 instead of 128. // // This is an alpha field and requires enabling the DRADeviceTaints // feature gate. @@ -618,8 +621,8 @@ type DeviceAttribute struct { // DeviceAttributeMaxValueLength is the maximum length of a string or version attribute value. const DeviceAttributeMaxValueLength = 64 -// DeviceTaintsMaxLength is the maximum number of taints per device. -const DeviceTaintsMaxLength = 4 +// DeviceTaintsMaxLength is the maximum number of taints per Device. +const DeviceTaintsMaxLength = 16 // The device this taint is attached to has the "effect" on // any claim which does not tolerate the taint and, through the claim, @@ -641,8 +644,10 @@ type DeviceTaint struct { // The effect of the taint on claims that do not tolerate the taint // and through such claims on the pods using them. - // Valid effects are NoSchedule and NoExecute. PreferNoSchedule as used for - // nodes is not valid here. + // + // Valid effects are None, NoSchedule and NoExecute. PreferNoSchedule as used for + // nodes is not valid here. More effects may get added in the future. + // Consumers must treat unknown effects like None. // // +required // +k8s:required @@ -652,6 +657,14 @@ type DeviceTaint struct { // // Implementing PreferNoSchedule would depend on a scoring solution for DRA. // It might get added as part of that. + // + // A possible future new effect is NoExecuteWithPodDisruptionBudget: + // honor the pod disruption budget instead of simply deleting pods. + // This is currently undecided, it could also be a separate field. + // + // Validation must be prepared to allow unknown enums in stored objects, + // which will enable adding new enums within a single release without + // ratcheting. // TimeAdded represents the time at which the taint was added. // Added automatically during create or update if not set. @@ -671,6 +684,9 @@ type DeviceTaint struct { type DeviceTaintEffect string const ( + // No effect, the taint is purely informational. + DeviceTaintEffectNone DeviceTaintEffect = "None" + // Do not allow new pods to schedule which use a tainted device unless they tolerate the taint, // but allow all pods submitted to Kubelet without going through the scheduler // to start, and allow all already-running pods to continue running. diff --git a/staging/src/k8s.io/client-go/applyconfigurations/utils.go b/staging/src/k8s.io/client-go/applyconfigurations/utils.go index aafb1ac1946..5c3df50c7c4 100644 --- a/staging/src/k8s.io/client-go/applyconfigurations/utils.go +++ b/staging/src/k8s.io/client-go/applyconfigurations/utils.go @@ -1720,6 +1720,8 @@ func ForKind(kind schema.GroupVersionKind) interface{} { return &resourcev1alpha3.DeviceTaintRuleApplyConfiguration{} case v1alpha3.SchemeGroupVersion.WithKind("DeviceTaintRuleSpec"): return &resourcev1alpha3.DeviceTaintRuleSpecApplyConfiguration{} + case v1alpha3.SchemeGroupVersion.WithKind("DeviceTaintRuleStatus"): + return &resourcev1alpha3.DeviceTaintRuleStatusApplyConfiguration{} case v1alpha3.SchemeGroupVersion.WithKind("DeviceTaintSelector"): return &resourcev1alpha3.DeviceTaintSelectorApplyConfiguration{} diff --git a/test/integration/apiserver/apply/status_test.go b/test/integration/apiserver/apply/status_test.go index 15aee66dafa..ba0e11337c9 100644 --- a/test/integration/apiserver/apply/status_test.go +++ b/test/integration/apiserver/apply/status_test.go @@ -58,6 +58,7 @@ var statusData = map[schema.GroupVersionResource]string{ gvr("storage.k8s.io", "v1", "volumeattachments"): `{"status": {"attached": true}}`, gvr("policy", "v1", "poddisruptionbudgets"): `{"status": {"currentHealthy": 5}}`, gvr("policy", "v1beta1", "poddisruptionbudgets"): `{"status": {"currentHealthy": 5}}`, + gvr("resource.k8s.io", "v1alpha3", "devicetaintrules"): `{"status": {"conditions": [{"type": "EvictionInProgress", "status": "True", "reason: "PodsLeft", "message: "100 pods left", "lastTransitionTime": "2020-01-01T00:00:00Z"}]}}`, gvr("resource.k8s.io", "v1beta1", "resourceclaims"): `{"status": {"allocation": {"nodeSelector": {"nodeSelectorTerms": [{"matchExpressions": [{"key": "some-label", "operator": "In", "values": ["some-value"]}] }]}}}}`, gvr("resource.k8s.io", "v1beta2", "resourceclaims"): `{"status": {"allocation": {"nodeSelector": {"nodeSelectorTerms": [{"matchExpressions": [{"key": "some-label", "operator": "In", "values": ["some-value"]}] }]}}}}`, gvr("resource.k8s.io", "v1", "resourceclaims"): `{"status": {"allocation": {"nodeSelector": {"nodeSelectorTerms": [{"matchExpressions": [{"key": "some-label", "operator": "In", "values": ["some-value"]}] }]}}}}`, diff --git a/test/integration/dra/dra_test.go b/test/integration/dra/dra_test.go index b788332e73a..4f13474019b 100644 --- a/test/integration/dra/dra_test.go +++ b/test/integration/dra/dra_test.go @@ -1142,25 +1142,28 @@ func testResourceClaimDeviceStatus(tCtx ktesting.TContext, enabled bool) { require.Equal(tCtx, deviceStatus, claim.Status.Devices, "after removing device status three") } -// testMaxResourceSlice creates a ResourceSlice that is as large as possible +// testMaxResourceSlice creates ResourceSlices that are as large as possible // and prints some information about it. func testMaxResourceSlice(tCtx ktesting.TContext) { - slice := NewMaxResourceSlice() - createdSlice := createSlice(tCtx, slice) - totalSize := createdSlice.Size() - var managedFieldsSize int - for _, f := range createdSlice.ManagedFields { - managedFieldsSize += f.Size() - } - specSize := createdSlice.Spec.Size() - tCtx.Logf("\n\nTotal size: %s\nManagedFields size: %s (%.0f%%)\nSpec size: %s (%.0f)%%\n\nManagedFields:\n%s", - resource.NewQuantity(int64(totalSize), resource.BinarySI), - resource.NewQuantity(int64(managedFieldsSize), resource.BinarySI), float64(managedFieldsSize)*100/float64(totalSize), - resource.NewQuantity(int64(specSize), resource.BinarySI), float64(specSize)*100/float64(totalSize), - klog.Format(createdSlice.ManagedFields), - ) - if diff := cmp.Diff(slice.Spec, createdSlice.Spec); diff != "" { - tCtx.Errorf("ResourceSliceSpec got modified during Create (- want, + got):\n%s", diff) + for name, slice := range NewMaxResourceSlices() { + tCtx.Run(name, func(tCtx ktesting.TContext) { + createdSlice := createSlice(tCtx, slice) + totalSize := createdSlice.Size() + var managedFieldsSize int + for _, f := range createdSlice.ManagedFields { + managedFieldsSize += f.Size() + } + specSize := createdSlice.Spec.Size() + tCtx.Logf("\n\nTotal size: %s\nManagedFields size: %s (%.0f%%)\nSpec size: %s (%.0f)%%\n\nManagedFields:\n%s", + resource.NewQuantity(int64(totalSize), resource.BinarySI), + resource.NewQuantity(int64(managedFieldsSize), resource.BinarySI), float64(managedFieldsSize)*100/float64(totalSize), + resource.NewQuantity(int64(specSize), resource.BinarySI), float64(specSize)*100/float64(totalSize), + klog.Format(createdSlice.ManagedFields), + ) + if diff := cmp.Diff(slice.Spec, createdSlice.Spec); diff != "" { + tCtx.Errorf("ResourceSliceSpec got modified during Create (- want, + got):\n%s", diff) + } + }) } } diff --git a/test/integration/dra/objects.go b/test/integration/dra/objects.go index aef0001e14c..63de5aa5c20 100644 --- a/test/integration/dra/objects.go +++ b/test/integration/dra/objects.go @@ -29,8 +29,33 @@ import ( "k8s.io/utils/ptr" ) -// NewMaxResourceSlice creates a slice that is as large as possible given the current validation constraints. -func NewMaxResourceSlice() *resourceapi.ResourceSlice { +// NewMaxResourceSlices creates slices that are as large as possible given the current validation constraints. +func NewMaxResourceSlices() map[string]*resourceapi.ResourceSlice { + slices := map[string]*resourceapi.ResourceSlice{ + "basic": newBasicResourceSlice(resourceapi.ResourceSliceMaxDevices), + "with-taints": newTaintedResourceSlice(), + } + return slices +} + +func newTaintedResourceSlice() *resourceapi.ResourceSlice { + slice := newBasicResourceSlice(resourceapi.ResourceSliceMaxDevicesWithTaints) + for i := range slice.Spec.Devices { + for j := 0; j < resourceapi.DeviceTaintsMaxLength; j++ { + slice.Spec.Devices[i].Taints = append(slice.Spec.Devices[i].Taints, + resourceapi.DeviceTaint{ + Key: maxLabelName(i), + Value: maxLabelValue(i), + Effect: resourceapi.DeviceTaintEffectNoSchedule, + TimeAdded: &metav1.Time{Time: time.Now().Truncate(time.Second)}, + }, + ) + } + } + return slice +} + +func newBasicResourceSlice(numDevices int) *resourceapi.ResourceSlice { slice := &resourceapi.ResourceSlice{ ObjectMeta: metav1.ObjectMeta{ Name: maxSubDomain(0), @@ -70,7 +95,7 @@ func NewMaxResourceSlice() *resourceapi.ResourceSlice { }(), Devices: func() []resourceapi.Device { var devices []resourceapi.Device - for i := 0; i < resourceapi.ResourceSliceMaxDevices; i++ { + for i := 0; i < numDevices; i++ { devices = append(devices, resourceapi.Device{ Name: maxDNSLabel(i), // Use attributes rather than capacity since it is more expensive. @@ -98,18 +123,6 @@ func NewMaxResourceSlice() *resourceapi.ResourceSlice { return consumesCounters }(), NodeName: ptr.To(maxSubDomain(0)), - Taints: func() []resourceapi.DeviceTaint { - var taints []resourceapi.DeviceTaint - for i := 0; i < resourceapi.DeviceTaintsMaxLength; i++ { - taints = append(taints, resourceapi.DeviceTaint{ - Key: maxLabelName(i), - Value: maxLabelValue(i), - Effect: resourceapi.DeviceTaintEffectNoSchedule, - TimeAdded: &metav1.Time{Time: time.Now().Truncate(time.Second)}, - }) - } - return taints - }(), }) } return devices From e4dda7b282e7bb781e43d17fb9e690d5bab39570 Mon Sep 17 00:00:00 2001 From: Patrick Ohly Date: Thu, 23 Oct 2025 18:05:38 +0200 Subject: [PATCH 03/11] DRA device taints: fix DeviceTaintRule + missing slice case When the ResourceSlice no longer exists, the ResourceSlice tracker didn't and couldn't report the tainted devices even if they are allocated and in use. The controller must keep track of DeviceTaintRules itself and handle this scenario. In this scenario it is impossible to evaluation CEL expressions because the necessary device attributes aren't available. We could: - Copy them in the allocation result: too large, big change. - Limit usage of CEL expressions to rules with no eviction: inconsistent. - Remove the fields which cannot be supported well. The last option is chosen. The tracker is now no longer needed by the eviction controller. Reading directly from the informer means that we cannot assume that pointers are consistent. We have to track ResourceSlices by their name, not their pointer. --- pkg/apis/resource/types.go | 15 -- pkg/apis/resource/validation/validation.go | 16 -- .../validation_devicetaintrule_test.go | 80 +------ .../device_taint_eviction.go | 200 ++++++++++++++---- .../device_taint_eviction_test.go | 49 ++++- .../src/k8s.io/api/resource/v1alpha3/types.go | 14 +- .../resourceslice/tracker/tracker.go | 71 ------- .../resourceslice/tracker/tracker_test.go | 132 +----------- 8 files changed, 221 insertions(+), 356 deletions(-) diff --git a/pkg/apis/resource/types.go b/pkg/apis/resource/types.go index 016e7c236de..8ea5ffa7fb4 100644 --- a/pkg/apis/resource/types.go +++ b/pkg/apis/resource/types.go @@ -1918,13 +1918,6 @@ type DeviceTaintRuleSpec struct { // The empty selector matches all devices. Without a selector, no devices // are matched. type DeviceTaintSelector struct { - // If DeviceClassName is set, the selectors defined there must be - // satisfied by a device to be selected. This field corresponds - // to class.metadata.name. - // - // +optional - DeviceClassName *string - // If driver is set, only devices from that driver are selected. // This fields corresponds to slice.spec.driver. // @@ -1951,14 +1944,6 @@ type DeviceTaintSelector struct { // // +optional Device *string - - // Selectors contains the same selection criteria as a ResourceClaim. - // Currently, CEL expressions are supported. All of these selectors - // must be satisfied. - // - // +optional - // +listType=atomic - Selectors []DeviceSelector } // DeviceTaintRuleStatus provides information about an on-going pod eviction. diff --git a/pkg/apis/resource/validation/validation.go b/pkg/apis/resource/validation/validation.go index 313f829c37e..eb54f9c77d3 100644 --- a/pkg/apis/resource/validation/validation.go +++ b/pkg/apis/resource/validation/validation.go @@ -1395,9 +1395,6 @@ func validateDeviceTaintSelector(filter, oldFilter *resource.DeviceTaintSelector if filter == nil { return allErrs } - if filter.DeviceClassName != nil { - allErrs = append(allErrs, validateDeviceClassName(*filter.DeviceClassName, fldPath.Child("deviceClassName"))...) - } if filter.Driver != nil { allErrs = append(allErrs, validateDriverName(*filter.Driver, fldPath.Child("driver"))...) } @@ -1408,19 +1405,6 @@ func validateDeviceTaintSelector(filter, oldFilter *resource.DeviceTaintSelector allErrs = append(allErrs, validateDeviceName(*filter.Device, fldPath.Child("device"))...) } - // If the selectors are exactly as before, we treat the CEL expressions as "stored". - // Any change, including merely reordering selectors, triggers validation as new - // expressions. - stored := false - if oldFilter != nil { - stored = apiequality.Semantic.DeepEqual(filter.Selectors, oldFilter.Selectors) - } - allErrs = append(allErrs, validateSlice(filter.Selectors, resource.DeviceSelectorsMaxSize, - func(selector resource.DeviceSelector, fldPath *field.Path) field.ErrorList { - return validateSelector(selector, fldPath, stored) - }, - fldPath.Child("selectors"))...) - return allErrs } diff --git a/pkg/apis/resource/validation/validation_devicetaintrule_test.go b/pkg/apis/resource/validation/validation_devicetaintrule_test.go index 48c606c388b..815b6c515e4 100644 --- a/pkg/apis/resource/validation/validation_devicetaintrule_test.go +++ b/pkg/apis/resource/validation/validation_devicetaintrule_test.go @@ -17,7 +17,6 @@ limitations under the License. package validation import ( - "strings" "testing" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -37,10 +36,9 @@ func testDeviceTaintRule(name string, spec resourceapi.DeviceTaintRuleSpec) *res var validDeviceTaintRuleSpec = resourceapi.DeviceTaintRuleSpec{ DeviceSelector: &resourceapi.DeviceTaintSelector{ - DeviceClassName: ptr.To(goodName), - Driver: ptr.To("test.example.com"), - Pool: ptr.To(goodName), - Device: ptr.To(goodName), + Driver: ptr.To("test.example.com"), + Pool: ptr.To(goodName), + Device: ptr.To(goodName), }, Taint: resourceapi.DeviceTaint{ Key: "example.com/taint", @@ -187,14 +185,6 @@ func TestValidateDeviceTaint(t *testing.T) { return taintRule }(), }, - "bad-class": { - wantFailures: field.ErrorList{field.Invalid(field.NewPath("spec", "deviceSelector", "deviceClassName"), badName, "a lowercase RFC 1123 subdomain must consist of lower case alphanumeric characters, '-' or '.', and must start and end with an alphanumeric character (e.g. 'example.com', regex used for validation is '[a-z0-9]([-a-z0-9]*[a-z0-9])?(\\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*')")}, - taintRule: func() *resourceapi.DeviceTaintRule { - taintRule := testDeviceTaintRule(goodName, validDeviceTaintRuleSpec) - taintRule.Spec.DeviceSelector.DeviceClassName = ptr.To(badName) - return taintRule - }(), - }, "bad-driver": { wantFailures: field.ErrorList{field.Invalid(field.NewPath("spec", "deviceSelector", "driver"), badName, "a lowercase RFC 1123 subdomain must consist of lower case alphanumeric characters, '-' or '.', and must start and end with an alphanumeric character (e.g. 'example.com', regex used for validation is '[a-z0-9]([-a-z0-9]*[a-z0-9])?(\\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*')")}, taintRule: func() *resourceapi.DeviceTaintRule { @@ -219,70 +209,6 @@ func TestValidateDeviceTaint(t *testing.T) { return taintRule }(), }, - "CEL-compile-errors": { - wantFailures: field.ErrorList{ - field.Invalid(field.NewPath("spec", "deviceSelector", "selectors").Index(1).Child("cel", "expression"), `device.attributes[true].someBoolean`, "compilation failed: ERROR: :1:18: found no matching overload for '_[_]' applied to '(map(string, map(string, any)), bool)'\n | device.attributes[true].someBoolean\n | .................^"), - }, - taintRule: func() *resourceapi.DeviceTaintRule { - taintRule := testDeviceTaintRule(goodName, validDeviceTaintRuleSpec) - taintRule.Spec.DeviceSelector.Selectors = []resourceapi.DeviceSelector{ - { - // Good selector. - CEL: &resourceapi.CELDeviceSelector{ - Expression: `device.driver == "dra.example.com"`, - }, - }, - { - // Bad selector. - CEL: &resourceapi.CELDeviceSelector{ - Expression: `device.attributes[true].someBoolean`, - }, - }, - } - return taintRule - }(), - }, - "CEL-length": { - wantFailures: field.ErrorList{ - field.TooLong(field.NewPath("spec", "deviceSelector", "selectors").Index(1).Child("cel", "expression"), "" /*unused*/, resourceapi.CELSelectorExpressionMaxLength), - }, - taintRule: func() *resourceapi.DeviceTaintRule { - taintRule := testDeviceTaintRule(goodName, validDeviceTaintRuleSpec) - expression := `device.driver == ""` - taintRule.Spec.DeviceSelector.Selectors = []resourceapi.DeviceSelector{ - { - // Good selector. - CEL: &resourceapi.CELDeviceSelector{ - Expression: strings.ReplaceAll(expression, `""`, `"`+strings.Repeat("x", resourceapi.CELSelectorExpressionMaxLength-len(expression))+`"`), - }, - }, - { - // Too long by one selector. - CEL: &resourceapi.CELDeviceSelector{ - Expression: strings.ReplaceAll(expression, `""`, `"`+strings.Repeat("x", resourceapi.CELSelectorExpressionMaxLength-len(expression)+1)+`"`), - }, - }, - } - return taintRule - }(), - }, - "CEL-cost": { - wantFailures: field.ErrorList{ - field.Forbidden(field.NewPath("spec", "deviceSelector", "selectors").Index(0).Child("cel", "expression"), "too complex, exceeds cost limit"), - }, - taintRule: func() *resourceapi.DeviceTaintRule { - claim := testDeviceTaintRule(goodName, validDeviceTaintRuleSpec) - claim.Spec.DeviceSelector.Selectors = []resourceapi.DeviceSelector{ - { - CEL: &resourceapi.CELDeviceSelector{ - // From https://github.com/kubernetes/kubernetes/blob/50fc400f178d2078d0ca46aee955ee26375fc437/test/integration/apiserver/cel/validatingadmissionpolicy_test.go#L2150. - Expression: `[1, 2, 3, 4, 5, 6, 7, 8, 9, 10].all(x, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10].all(y, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10].all(z, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10].all(z2, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10].all(z3, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10].all(z4, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10].all(z5, int('1'.find('[0-9]*')) < 100)))))))`, - }, - }, - } - return claim - }(), - }, // Minimal tests for DeviceTaint. Full coverage of validateDeviceTaint is in ResourceSlice test. "valid-taint": { taintRule: func() *resourceapi.DeviceTaintRule { diff --git a/pkg/controller/devicetainteviction/device_taint_eviction.go b/pkg/controller/devicetainteviction/device_taint_eviction.go index 103a0d1a161..6c9b201773e 100644 --- a/pkg/controller/devicetainteviction/device_taint_eviction.go +++ b/pkg/controller/devicetainteviction/device_taint_eviction.go @@ -18,8 +18,8 @@ package devicetainteviction import ( "context" - "errors" "fmt" + "iter" "math" "slices" "strings" @@ -29,14 +29,15 @@ import ( v1 "k8s.io/api/core/v1" resourceapi "k8s.io/api/resource/v1" + resourcealpha "k8s.io/api/resource/v1alpha3" apiequality "k8s.io/apimachinery/pkg/api/equality" apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/util/diff" utilruntime "k8s.io/apimachinery/pkg/util/runtime" "k8s.io/apimachinery/pkg/util/sets" - utilfeature "k8s.io/apiserver/pkg/util/feature" coreinformers "k8s.io/client-go/informers/core/v1" resourceinformers "k8s.io/client-go/informers/resource/v1" resourcealphainformers "k8s.io/client-go/informers/resource/v1alpha3" @@ -44,15 +45,14 @@ import ( "k8s.io/client-go/kubernetes/scheme" v1core "k8s.io/client-go/kubernetes/typed/core/v1" corelisters "k8s.io/client-go/listers/core/v1" + resourcealphalisters "k8s.io/client-go/listers/resource/v1alpha3" "k8s.io/client-go/tools/cache" "k8s.io/client-go/tools/record" "k8s.io/dynamic-resource-allocation/resourceclaim" - resourceslicetracker "k8s.io/dynamic-resource-allocation/resourceslice/tracker" "k8s.io/klog/v2" apipod "k8s.io/kubernetes/pkg/api/v1/pod" "k8s.io/kubernetes/pkg/controller/devicetainteviction/metrics" "k8s.io/kubernetes/pkg/controller/tainteviction" - "k8s.io/kubernetes/pkg/features" utilpod "k8s.io/kubernetes/pkg/util/pod" ) @@ -92,6 +92,7 @@ type Controller struct { sliceInformer resourceinformers.ResourceSliceInformer taintInformer resourcealphainformers.DeviceTaintRuleInformer classInformer resourceinformers.DeviceClassInformer + ruleLister resourcealphalisters.DeviceTaintRuleLister haveSynced []cache.InformerSynced metrics metrics.Metrics @@ -117,7 +118,8 @@ type poolID struct { } type pool struct { - slices sets.Set[*resourceapi.ResourceSlice] + // slices maps the global name to the current instance under that name. + slices map[string]*resourceapi.ResourceSlice maxGeneration int64 } @@ -127,10 +129,10 @@ func (p *pool) addSlice(slice *resourceapi.ResourceSlice) { return } if p.slices == nil { - p.slices = sets.New[*resourceapi.ResourceSlice]() + p.slices = make(map[string]*resourceapi.ResourceSlice) p.maxGeneration = math.MinInt64 } - p.slices.Insert(slice) + p.slices[slice.Name] = slice // Adding a slice can only increase the generation. if slice.Spec.Pool.Generation > p.maxGeneration { @@ -143,13 +145,13 @@ func (p *pool) removeSlice(slice *resourceapi.ResourceSlice) { if slice == nil { return } - p.slices.Delete(slice) + delete(p.slices, slice.Name) // Removing a slice might have decreased the generation to // that of some other slice. if slice.Spec.Pool.Generation == p.maxGeneration { maxGeneration := int64(math.MinInt64) - for slice := range p.slices { + for _, slice := range p.slices { if slice.Spec.Pool.Generation > maxGeneration { maxGeneration = slice.Spec.Pool.Generation } @@ -162,7 +164,7 @@ func (p *pool) removeSlice(slice *resourceapi.ResourceSlice) { // The result is sorted by device name. func (p pool) getTaintedDevices() []taintedDevice { var buffer []taintedDevice - for slice := range p.slices { + for _, slice := range p.slices { if slice.Spec.Pool.Generation != p.maxGeneration { continue } @@ -185,7 +187,7 @@ func (p pool) getTaintedDevices() []taintedDevice { // getDevice looks up one device by name. Out-dated slices are ignored. func (p pool) getDevice(deviceName string) *resourceapi.Device { - for slice := range p.slices { + for _, slice := range p.slices { if slice.Spec.Pool.Generation != p.maxGeneration { continue } @@ -300,6 +302,7 @@ func New(c clientset.Interface, podInformer coreinformers.PodInformer, claimInfo sliceInformer: sliceInformer, taintInformer: taintInformer, classInformer: classInformer, + ruleLister: taintInformer.Lister(), allocatedClaims: make(map[types.NamespacedName]allocatedClaim), pools: make(map[poolID]pool), // Instantiate all informers now to ensure that they get started. @@ -420,7 +423,7 @@ func (tc *Controller) Run(ctx context.Context) error { AddFunc: func(obj any) { pod, ok := obj.(*v1.Pod) if !ok { - logger.Error(nil, "Expected ResourcePod", "actual", fmt.Sprintf("%T", obj)) + logger.Error(nil, "Expected Pod", "actual", fmt.Sprintf("%T", obj)) return } mutex.Lock() @@ -463,31 +466,54 @@ func (tc *Controller) Run(ctx context.Context) error { }() tc.haveSynced = append(tc.haveSynced, podHandler.HasSynced) - opts := resourceslicetracker.Options{ - EnableDeviceTaints: true, - EnableConsumableCapacity: utilfeature.DefaultFeatureGate.Enabled(features.DRAConsumableCapacity), - SliceInformer: tc.sliceInformer, - TaintInformer: tc.taintInformer, - ClassInformer: tc.classInformer, - KubeClient: tc.client, - } - sliceTracker, err := resourceslicetracker.StartTracker(ctx, opts) + ruleHandler, err := tc.taintInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ + AddFunc: func(obj any) { + rule, ok := obj.(*resourcealpha.DeviceTaintRule) + if !ok { + logger.Error(nil, "Expected DeviceTaintRule", "actual", fmt.Sprintf("%T", obj)) + return + } + mutex.Lock() + defer mutex.Unlock() + tc.handleRuleChange(nil, rule) + }, + UpdateFunc: func(oldObj, newObj any) { + oldRule, ok := oldObj.(*resourcealpha.DeviceTaintRule) + if !ok { + logger.Error(nil, "Expected DeviceTaintRule", "actual", fmt.Sprintf("%T", oldObj)) + return + } + newRule, ok := newObj.(*resourcealpha.DeviceTaintRule) + if !ok { + logger.Error(nil, "Expected DeviceTaintRule", "actual", fmt.Sprintf("%T", newObj)) + } + mutex.Lock() + defer mutex.Unlock() + tc.handleRuleChange(oldRule, newRule) + }, + DeleteFunc: func(obj any) { + if tombstone, ok := obj.(cache.DeletedFinalStateUnknown); ok { + obj = tombstone.Obj + } + rule, ok := obj.(*resourcealpha.DeviceTaintRule) + if !ok { + logger.Error(nil, "Expected DeviceTaintRule", "actual", fmt.Sprintf("%T", obj)) + return + } + mutex.Lock() + defer mutex.Unlock() + tc.handleRuleChange(rule, nil) + }, + }) if err != nil { - return fmt.Errorf("initialize ResourceSlice tracker: %w", err) + return fmt.Errorf("adding DeviceTaintRule event handler: %w", err) } - tc.haveSynced = append(tc.haveSynced, sliceTracker.HasSynced) - defer sliceTracker.Stop() + defer func() { + _ = tc.taintInformer.Informer().RemoveEventHandler(ruleHandler) + }() + tc.haveSynced = append(tc.haveSynced, ruleHandler.HasSynced) - // Wait for tracker to sync before we react to events. - // This doesn't have to be perfect, it merely avoids unnecessary - // work which might be done as events get emitted for intermediate - // state. - if !cache.WaitForNamedCacheSyncWithContext(ctx, tc.haveSynced...) { - return errors.New("wait for cache sync timed out") - } - logger.V(1).Info("Underlying informers have synced") - - _, err = sliceTracker.AddEventHandler(cache.ResourceEventHandlerFuncs{ + sliceHandler, err := tc.sliceInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ AddFunc: func(obj any) { slice, ok := obj.(*resourceapi.ResourceSlice) if !ok { @@ -525,11 +551,18 @@ func (tc *Controller) Run(ctx context.Context) error { }, }) if err != nil { - return fmt.Errorf("add slice event handler: %w", err) + return fmt.Errorf("adding slice event handler: %w", err) } + defer func() { + _ = tc.sliceInformer.Informer().RemoveEventHandler(sliceHandler) + }() + tc.haveSynced = append(tc.haveSynced, sliceHandler.HasSynced) - // sliceTracker.AddEventHandler blocked while delivering events for all known - // ResourceSlices. Therefore our own state is up-to-date once we get here. + if !cache.WaitForNamedCacheSyncWithContext(ctx, tc.haveSynced...) { + // If we get here, the caller canceled the context. This is not an error. + return nil + } + logger.V(1).Info("Underlying informers have synced") tc.hasSynced.Store(1) <-ctx.Done() @@ -627,23 +660,15 @@ func (tc *Controller) evictionTime(claim *resourceapi.ResourceClaim) *metav1.Tim for _, allocatedDevice := range allocation.Devices.Results { id := poolID{driverName: allocatedDevice.Driver, poolName: allocatedDevice.Pool} device := tc.pools[id].getDevice(allocatedDevice.Device) - if device == nil { - // Unknown device? Can't be tainted... - continue - } nextTaint: - for _, taint := range device.Taints { - if taint.Effect != resourceapi.DeviceTaintEffectNoExecute { - continue - } - + for taint := range tc.allEvictingDeviceTaints(allocatedDevice, device) { newEvictionTime := taint.TimeAdded haveToleration := false tolerationSeconds := int64(math.MaxInt64) for _, toleration := range allocatedDevice.Tolerations { if toleration.Effect == resourceapi.DeviceTaintEffectNoExecute && - resourceclaim.ToleratesTaint(toleration, taint) { + resourceclaim.ToleratesTaint(toleration, *taint) { if toleration.TolerationSeconds == nil { // Tolerate forever -> ignore taint. continue nextTaint @@ -677,6 +702,87 @@ func (tc *Controller) evictionTime(claim *resourceapi.ResourceClaim) *metav1.Tim return evictionTime } +// allEvictingDeviceTaints allows iterating over all DeviceTaintRules with NoExecute effect which affect the allocated device. +// A taint may come from either the ResourceSlice informer (not the tracker!) or from a DeviceTaintRule, but not both. +func (tc *Controller) allEvictingDeviceTaints(allocatedDevice resourceapi.DeviceRequestAllocationResult, device *resourceapi.Device) iter.Seq[*resourceapi.DeviceTaint] { + rules, err := tc.ruleLister.List(labels.Everything()) + // TODO: instead of listing and handling an error, keep track of rules in the informer event handler? + if err != nil { + panic(err) + } + + return func(yield func(*resourceapi.DeviceTaint) bool) { + if device != nil { + for i := range device.Taints { + taint := &device.Taints[i] + if taint.Effect != resourceapi.DeviceTaintEffectNoExecute { + continue + } + if !yield(taint) { + return + } + } + } + + for _, rule := range rules { + if rule.Spec.Taint.Effect != resourcealpha.DeviceTaintEffectNoExecute { + continue + } + selector := rule.Spec.DeviceSelector + if selector == nil { + continue + } + if selector.Driver != nil && *selector.Driver != allocatedDevice.Driver || + selector.Pool != nil && *selector.Pool != allocatedDevice.Pool || + selector.Device != nil && *selector.Device != allocatedDevice.Device { + continue + } + if !yield( + // TODO when GA: directly point to rule.Spec.Taint. + &resourceapi.DeviceTaint{ + Key: rule.Spec.Taint.Key, + Value: rule.Spec.Taint.Value, + Effect: resourceapi.DeviceTaintEffect(rule.Spec.Taint.Effect), + TimeAdded: rule.Spec.Taint.TimeAdded, + }, + ) { + return + } + } + } +} + +func (tc *Controller) handleRuleChange(oldRule, newRule *resourcealpha.DeviceTaintRule) { + rule := newRule + if rule == nil { + rule = oldRule + } + name := newNamespacedName(rule) + if tc.eventLogger != nil { + // This is intentionally very verbose for debugging. + tc.eventLogger.Info("DeviceTaintRule changed", "ruleObject", name, "oldRule", klog.Format(oldRule), "newRule", klog.Format(newRule), "diff", diff.Diff(oldRule, newRule)) + } + + if oldRule != nil && + newRule != nil && + oldRule.UID == newRule.UID && + apiequality.Semantic.DeepEqual(&oldRule.Spec, &newRule.Spec) { + return + } + + // Rule spec changes should be rare. Simply do a brute-force re-evaluation of all allocated claims. + for name, oldAllocatedClaim := range tc.allocatedClaims { + newAllocatedClaim := allocatedClaim{ + ResourceClaim: oldAllocatedClaim.ResourceClaim, + } + newAllocatedClaim.evictionTime = tc.evictionTime(oldAllocatedClaim.ResourceClaim) + tc.allocatedClaims[name] = newAllocatedClaim + if !newAllocatedClaim.evictionTime.Equal(oldAllocatedClaim.evictionTime) { + tc.handlePods(newAllocatedClaim.ResourceClaim) + } + } +} + func (tc *Controller) handleSliceChange(oldSlice, newSlice *resourceapi.ResourceSlice) { slice := newSlice if slice == nil { diff --git a/pkg/controller/devicetainteviction/device_taint_eviction_test.go b/pkg/controller/devicetainteviction/device_taint_eviction_test.go index b9024b58b28..32056fec003 100644 --- a/pkg/controller/devicetainteviction/device_taint_eviction_test.go +++ b/pkg/controller/devicetainteviction/device_taint_eviction_test.go @@ -39,11 +39,11 @@ import ( v1 "k8s.io/api/core/v1" resourceapi "k8s.io/api/resource/v1" + resourcealpha "k8s.io/api/resource/v1alpha3" apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" - "k8s.io/apimachinery/pkg/util/sets" "k8s.io/client-go/informers" "k8s.io/client-go/kubernetes/fake" core "k8s.io/client-go/testing" @@ -148,14 +148,14 @@ func (s state) slicesAsMap() map[poolID]pool { id := poolID{driverName: slice.Spec.Driver, poolName: slice.Spec.Pool.Name} pool := pools[id] if pool.slices == nil { - pool.slices = sets.New[*resourceapi.ResourceSlice]() + pool.slices = make(map[string]*resourceapi.ResourceSlice) } - pool.slices.Insert(slice) + pool.slices[slice.Name] = slice pools[id] = pool } for id, pool := range pools { maxGeneration := int64(math.MinInt64) - for slice := range pool.slices { + for _, slice := range pool.slices { if slice.Spec.Pool.Generation > maxGeneration { maxGeneration = slice.Spec.Pool.Generation } @@ -289,6 +289,22 @@ var ( slice.Spec.Pool.Generation++ return slice }() + ruleEvict = &resourcealpha.DeviceTaintRule{ + ObjectMeta: metav1.ObjectMeta{ + Name: "evict", + }, + + Spec: resourcealpha.DeviceTaintRuleSpec{ + DeviceSelector: &resourcealpha.DeviceTaintSelector{ + Driver: ptr.To(driver), + }, + Taint: resourcealpha.DeviceTaint{ + Key: "unhealthy", + Effect: resourcealpha.DeviceTaintEffectNoExecute, + TimeAdded: &taintTime, + }, + }, + } claim = st.MakeResourceClaim(). Name(claimName). Namespace(namespace). @@ -455,7 +471,7 @@ func TestHandlers(t *testing.T) { allocatedClaims: []allocatedClaim{{ResourceClaim: inUseClaim}}, }, }, - "tainted-claim": { + "tainted-claim-through-resourceslice": { events: []any{ add(sliceTainted), add(slice2), @@ -466,6 +482,15 @@ func TestHandlers(t *testing.T) { allocatedClaims: []allocatedClaim{{ResourceClaim: inUseClaim, evictionTime: &taintTime}}, }, }, + "tainted-claim-through-rule": { + events: []any{ + add(ruleEvict), + add(inUseClaim), + }, + finalState: state{ + allocatedClaims: []allocatedClaim{{ResourceClaim: inUseClaim, evictionTime: &taintTime}}, + }, + }, "evict-pod-resourceclaim": { events: []any{ add(sliceTainted), @@ -1178,14 +1203,13 @@ func testHandlers(tContext *testContext, tc testCase) { } func applyEventPair(tContext *testContext, event any) { - store := tContext.informerFactory.Core().V1().Pods().Informer().GetStore() - switch pair := event.(type) { case [2]*resourceapi.ResourceSlice: tContext.handleSliceChange(pair[0], pair[1]) case [2]*resourceapi.ResourceClaim: tContext.handleClaimChange(pair[0], pair[1]) case [2]*v1.Pod: + store := tContext.informerFactory.Core().V1().Pods().Informer().GetStore() switch { case pair[0] != nil && pair[1] != nil: tContext.ExpectNoError(store.Update(pair[1])) @@ -1195,6 +1219,17 @@ func applyEventPair(tContext *testContext, event any) { tContext.ExpectNoError(store.Add(pair[1])) } tContext.handlePodChange(pair[0], pair[1]) + case [2]*resourcealpha.DeviceTaintRule: + store := tContext.informerFactory.Resource().V1alpha3().DeviceTaintRules().Informer().GetStore() + switch { + case pair[0] != nil && pair[1] != nil: + tContext.ExpectNoError(store.Update(pair[1])) + case pair[0] != nil: + tContext.ExpectNoError(store.Delete(pair[0])) + default: + tContext.ExpectNoError(store.Add(pair[1])) + } + tContext.handleRuleChange(pair[0], pair[1]) default: tContext.Fatalf("unexpected event type %T", event) } diff --git a/staging/src/k8s.io/api/resource/v1alpha3/types.go b/staging/src/k8s.io/api/resource/v1alpha3/types.go index 3ea3e7bafd5..ba02edabaa6 100644 --- a/staging/src/k8s.io/api/resource/v1alpha3/types.go +++ b/staging/src/k8s.io/api/resource/v1alpha3/types.go @@ -234,7 +234,12 @@ type DeviceTaintSelector struct { // to class.metadata.name. // // +optional - DeviceClassName *string `json:"deviceClassName,omitempty" protobuf:"bytes,1,opt,name=deviceClassName"` + // + // Tombstoned since 1.35 because it turned out that supporting this in all cases + // would depend on copying the device attributes into the ResourceClaim allocation + // result. Without that the eviction controller cannot evaluate these CEL expressions. + // + // DeviceClassName *string `json:"deviceClassName,omitempty" protobuf:"bytes,1,opt,name=deviceClassName"` // If driver is set, only devices from that driver are selected. // This fields corresponds to slice.spec.driver. @@ -269,7 +274,12 @@ type DeviceTaintSelector struct { // // +optional // +listType=atomic - Selectors []DeviceSelector `json:"selectors,omitempty" protobuf:"bytes,5,rep,name=selectors"` + // + // Tombstoned since 1.35 because it turned out that supporting this in all cases + // would depend on copying the device attributes into the ResourceClaim allocation + // result. Without that the eviction controller cannot evaluate these CEL expressions. + // + // Selectors []DeviceSelector `json:"selectors,omitempty" protobuf:"bytes,5,rep,name=selectors"` } // DeviceTaintRuleStatus provides information about an on-going pod eviction. diff --git a/staging/src/k8s.io/dynamic-resource-allocation/resourceslice/tracker/tracker.go b/staging/src/k8s.io/dynamic-resource-allocation/resourceslice/tracker/tracker.go index b27b3d0392c..57dd0bf6a9d 100644 --- a/staging/src/k8s.io/dynamic-resource-allocation/resourceslice/tracker/tracker.go +++ b/staging/src/k8s.io/dynamic-resource-allocation/resourceslice/tracker/tracker.go @@ -38,7 +38,6 @@ import ( resourcelisters "k8s.io/client-go/listers/resource/v1" "k8s.io/client-go/tools/cache" "k8s.io/client-go/tools/record" - "k8s.io/dynamic-resource-allocation/cel" "k8s.io/klog/v2" "k8s.io/utils/buffer" "k8s.io/utils/ptr" @@ -65,7 +64,6 @@ type Tracker struct { deviceTaintsHandle cache.ResourceEventHandlerRegistration deviceClasses cache.SharedIndexInformer deviceClassesHandle cache.ResourceEventHandlerRegistration - celCache *cel.Cache patchedResourceSlices cache.Store broadcaster record.EventBroadcaster recorder record.EventRecorder @@ -155,7 +153,6 @@ func newTracker(ctx context.Context, opts Options) (finalT *Tracker, finalErr er resourceSlices: opts.SliceInformer.Informer(), deviceTaints: opts.TaintInformer.Informer(), deviceClasses: opts.ClassInformer.Informer(), - celCache: cel.NewCache(10, cel.Features{EnableConsumableCapacity: opts.EnableConsumableCapacity}), patchedResourceSlices: cache.NewStore(cache.MetaNamespaceKeyFunc), handleError: utilruntime.HandleErrorWithContext, eventQueue: *buffer.NewRing[func()](buffer.RingOptions{InitialSize: 0, NormalSize: 4}), @@ -644,8 +641,6 @@ func (t *Tracker) applyPatches(ctx context.Context, slice *resourceapi.ResourceS logger.V(6).Info("processing DeviceTaintRule") deviceSelector := taintRule.Spec.DeviceSelector - var deviceClassExprs []cel.CompilationResult - var selectorExprs []cel.CompilationResult var deviceName *string if deviceSelector != nil { if deviceSelector.Driver != nil && *deviceSelector.Driver != slice.Spec.Driver { @@ -657,32 +652,7 @@ func (t *Tracker) applyPatches(ctx context.Context, slice *resourceapi.ResourceS continue } deviceName = deviceSelector.Device - if deviceSelector.DeviceClassName != nil { - logger := logger.WithValues("deviceClassName", *deviceSelector.DeviceClassName) - classObj, exists, err := t.deviceClasses.GetIndexer().GetByKey(*deviceSelector.DeviceClassName) - if err != nil { - return nil, fmt.Errorf("failed to get device class %s for DeviceTaintRule %s", *deviceSelector.DeviceClassName, taintRule.Name) - } - if !exists { - logger.V(7).Info("DeviceTaintRule does not apply, DeviceClass does not exist") - continue - } - class := classObj.(*resourceapi.DeviceClass) - for _, selector := range class.Spec.Selectors { - if selector.CEL != nil { - expr := t.celCache.GetOrCompile(selector.CEL.Expression) - deviceClassExprs = append(deviceClassExprs, expr) - } - } - } - for _, selector := range deviceSelector.Selectors { - if selector.CEL != nil { - expr := t.celCache.GetOrCompile(selector.CEL.Expression) - selectorExprs = append(selectorExprs, expr) - } - } } - devices: for dIndex, device := range slice.Spec.Devices { deviceID := deviceID(slice.Spec.Driver, slice.Spec.Pool.Name, device.Name) logger := logger.WithValues("device", deviceID) @@ -692,47 +662,6 @@ func (t *Tracker) applyPatches(ctx context.Context, slice *resourceapi.ResourceS continue } - for i, expr := range deviceClassExprs { - if expr.Error != nil { - // Could happen if some future apiserver accepted some - // future expression and then got downgraded. Normally - // the "stored expression" mechanism prevents that, but - // this code here might be more than one release older - // than the cluster it runs in. - return nil, fmt.Errorf("DeviceTaintRule %s: class %s: selector #%d: CEL compile error: %w", taintRule.Name, *deviceSelector.DeviceClassName, i, expr.Error) - } - matches, details, err := expr.DeviceMatches(ctx, cel.Device{Driver: slice.Spec.Driver, Attributes: device.Attributes, Capacity: device.Capacity}) - logger.V(7).Info("CEL result", "class", *deviceSelector.DeviceClassName, "selector", i, "expression", expr.Expression, "matches", matches, "actualCost", ptr.Deref(details.ActualCost(), 0), "err", err) - if err != nil { - continue devices - } - if !matches { - continue devices - } - } - - for i, expr := range selectorExprs { - if expr.Error != nil { - // Could happen if some future apiserver accepted some - // future expression and then got downgraded. Normally - // the "stored expression" mechanism prevents that, but - // this code here might be more than one release older - // than the cluster it runs in. - return nil, fmt.Errorf("DeviceTaintRule %s: selector #%d: CEL compile error: %w", taintRule.Name, i, expr.Error) - } - matches, details, err := expr.DeviceMatches(ctx, cel.Device{Driver: slice.Spec.Driver, Attributes: device.Attributes, Capacity: device.Capacity}) - logger.V(7).Info("CEL result", "selector", i, "expression", expr.Expression, "matches", matches, "actualCost", ptr.Deref(details.ActualCost(), 0), "err", err) - if err != nil { - if t.recorder != nil { - t.recorder.Eventf(taintRule, v1.EventTypeWarning, "CELRuntimeError", "selector #%d: runtime error: %v", i, err) - } - continue devices - } - if !matches { - continue devices - } - } - logger.V(6).Info("applying matching DeviceTaintRule") // TODO: remove conversion once taint is already in the right API package. diff --git a/staging/src/k8s.io/dynamic-resource-allocation/resourceslice/tracker/tracker_test.go b/staging/src/k8s.io/dynamic-resource-allocation/resourceslice/tracker/tracker_test.go index 0e2c13c3b34..d182e81abfb 100644 --- a/staging/src/k8s.io/dynamic-resource-allocation/resourceslice/tracker/tracker_test.go +++ b/staging/src/k8s.io/dynamic-resource-allocation/resourceslice/tracker/tracker_test.go @@ -316,37 +316,10 @@ var ( } return rule } - taintCELSelectedDevicesRule = func(rule *resourcealphaapi.DeviceTaintRule, exprs ...string) *resourcealphaapi.DeviceTaintRule { - rule = rule.DeepCopy() - var selectors []resourcealphaapi.DeviceSelector - for _, expr := range exprs { - selectors = append(selectors, resourcealphaapi.DeviceSelector{ - CEL: &resourcealphaapi.CELDeviceSelector{ - Expression: expr, - }, - }) - } - rule.Spec.DeviceSelector = &resourcealphaapi.DeviceTaintSelector{ - Selectors: selectors, - } - return rule - } - taintDeviceClassRule = func(rule *resourcealphaapi.DeviceTaintRule, deviceClassName string) *resourcealphaapi.DeviceTaintRule { - rule = rule.DeepCopy() - rule.Spec.DeviceSelector = &resourcealphaapi.DeviceTaintSelector{ - DeviceClassName: &deviceClassName, - } - return rule - } - taintPool1DevicesRule = taintPoolDevicesRule(taintAllDevicesRule, pool1) - taintPool2DevicesRule = taintPoolDevicesRule(taintAllDevicesRule, pool2) - taintDriver1DevicesRule = taintDriverDevicesRule(taintAllDevicesRule, driver1) - taintDevice1Rule = taintNamedDevicesRule(taintAllDevicesRule, device1Name) - taintDriver1DevicesCELRule = taintCELSelectedDevicesRule(taintAllDevicesRule, `device.driver == "`+driver1+`"`) - taintNoDevicesCELRule = taintCELSelectedDevicesRule(taintAllDevicesRule, `true`, `false`, `true`) - taintNoDevicesCELRuntimeErrorRule = taintCELSelectedDevicesRule(taintAllDevicesRule, `device.attributes["test.example.com"].deviceAttr`) - taintNoDevicesInvalidCELRule = taintCELSelectedDevicesRule(taintAllDevicesRule, `invalid`) - taintDeviceClass1Rule = taintDeviceClassRule(taintAllDevicesRule, deviceClass1.Name) + taintPool1DevicesRule = taintPoolDevicesRule(taintAllDevicesRule, pool1) + taintPool2DevicesRule = taintPoolDevicesRule(taintAllDevicesRule, pool2) + taintDriver1DevicesRule = taintDriverDevicesRule(taintAllDevicesRule, driver1) + taintDevice1Rule = taintNamedDevicesRule(taintAllDevicesRule, device1Name) ) func TestListPatchedResourceSlices(t *testing.T) { @@ -519,102 +492,19 @@ func TestListPatchedResourceSlices(t *testing.T) { {event: handlerEventAdd, newObj: slice2}, }, }, - "add-attribute-for-selector": { - events: []any{ - add(taintDriver1DevicesCELRule), - add(slice1), - add(slice2), - }, - expectedPatchedSlices: []*resourceapi.ResourceSlice{ - slice1Tainted, - slice2, - }, - expectedHandlerEvents: []handlerEvent{ - {event: handlerEventAdd, newObj: slice1Tainted}, - {event: handlerEventAdd, newObj: slice2}, - }, - }, - "selector-does-not-match": { - events: []any{ - add(taintNoDevicesCELRule), - add(slice1), - }, - expectedPatchedSlices: []*resourceapi.ResourceSlice{ - slice1, - }, - expectedHandlerEvents: []handlerEvent{ - {event: handlerEventAdd, newObj: slice1}, - }, - }, - "runtime-CEL-errors-skip-devices": { - events: []any{ - add(taintNoDevicesCELRuntimeErrorRule), - add(slice1), - }, - expectedPatchedSlices: []*resourceapi.ResourceSlice{ - slice1, - }, - expectEvents: func(t *assert.CollectT, events *v1.EventList) { - if !assert.Len(t, events.Items, 1) { - return - } - assert.Equal(t, taintNoDevicesCELRuntimeErrorRule.Name, events.Items[0].InvolvedObject.Name) - assert.Equal(t, "CELRuntimeError", events.Items[0].Reason) - }, - expectedHandlerEvents: []handlerEvent{ - {event: handlerEventAdd, newObj: slice1}, - }, - }, - "invalid-CEL-expression-throws-error": { - events: []any{ - []any{ - add(taintNoDevicesInvalidCELRule), - add(slice1), - }, - }, - expectedPatchedSlices: []*resourceapi.ResourceSlice{}, - expectUnhandledErrors: func(t *testing.T, errs []error) { - if !assert.Len(t, errs, 1) { - return - } - assert.ErrorContains(t, errs[0], "CEL compile error") - }, - }, - "add-taint-for-device-class": { - events: []any{ - add(deviceClass1), - add(taintDeviceClass1Rule), - add(slice1), - add(slice2), - }, - expectedPatchedSlices: []*resourceapi.ResourceSlice{ - slice1Tainted, - slice2, - }, - expectedHandlerEvents: []handlerEvent{ - {event: handlerEventAdd, newObj: slice1Tainted}, - {event: handlerEventAdd, newObj: slice2}, - }, - }, "filter-all-criteria": { events: []any{ add(deviceClass1), add( - taintDeviceClassRule( - taintDriverDevicesRule( - taintPoolDevicesRule( - taintNamedDevicesRule( - taintCELSelectedDevicesRule( - taintAllDevicesRule, - `true`, - ), - device1Name, - ), - pool1, + taintDriverDevicesRule( + taintPoolDevicesRule( + taintNamedDevicesRule( + taintAllDevicesRule, + device1Name, ), - driver1, + pool1, ), - deviceClass1.Name, + driver1, ), ), add(slice1), From 75c9186792811a5b4bc38a5171283e2f58a718a0 Mon Sep 17 00:00:00 2001 From: Patrick Ohly Date: Tue, 28 Oct 2025 21:20:34 +0100 Subject: [PATCH 04/11] generated files --- api/discovery/aggregated_v2.json | 15 + .../apis__resource.k8s.io__v1alpha3.json | 11 + api/openapi-spec/swagger.json | 271 +++++++++-- .../v3/apis__resource.k8s.io__v1_openapi.json | 6 +- ...is__resource.k8s.io__v1alpha3_openapi.json | 437 ++++++++++++++++-- ...pis__resource.k8s.io__v1beta1_openapi.json | 6 +- ...pis__resource.k8s.io__v1beta2_openapi.json | 6 +- .../resource/v1/zz_generated.validations.go | 2 +- .../v1alpha3/zz_generated.conversion.go | 40 +- .../v1beta1/zz_generated.validations.go | 2 +- .../v1beta2/zz_generated.validations.go | 2 +- pkg/apis/resource/zz_generated.deepcopy.go | 36 +- pkg/generated/openapi/zz_generated.openapi.go | 119 +++-- .../k8s.io/api/resource/v1/generated.proto | 12 +- .../v1/types_swagger_doc_generated.go | 6 +- .../api/resource/v1alpha3/generated.pb.go | 304 +++++++----- .../api/resource/v1alpha3/generated.proto | 59 ++- .../v1alpha3/generated.protomessage.pb.go | 2 + .../v1alpha3/types_swagger_doc_generated.go | 24 +- .../v1alpha3/zz_generated.deepcopy.go | 37 +- .../v1alpha3/zz_generated.model_name.go | 5 + .../api/resource/v1beta1/generated.proto | 12 +- .../v1beta1/types_swagger_doc_generated.go | 6 +- .../api/resource/v1beta2/generated.proto | 12 +- .../v1beta2/types_swagger_doc_generated.go | 6 +- ...ource.k8s.io.v1alpha3.DeviceTaintRule.json | 22 +- ...esource.k8s.io.v1alpha3.DeviceTaintRule.pb | Bin 543 -> 566 bytes ...ource.k8s.io.v1alpha3.DeviceTaintRule.yaml | 12 +- ...lpha3.DeviceTaintRule.after_roundtrip.json | 60 +++ ...1alpha3.DeviceTaintRule.after_roundtrip.pb | Bin 0 -> 501 bytes ...lpha3.DeviceTaintRule.after_roundtrip.yaml | 45 ++ ...lpha3.DeviceTaintRule.after_roundtrip.json | 60 +++ ...1alpha3.DeviceTaintRule.after_roundtrip.pb | Bin 0 -> 501 bytes ...lpha3.DeviceTaintRule.after_roundtrip.yaml | 45 ++ .../applyconfigurations/internal/internal.go | 37 +- .../applyconfigurations/resource/v1/device.go | 4 +- .../resource/v1/devicetaint.go | 6 +- .../resource/v1/resourceslicespec.go | 2 +- .../resource/v1alpha3/celdeviceselector.go | 91 ---- .../resource/v1alpha3/deviceselector.go | 42 -- .../resource/v1alpha3/devicetaint.go | 6 +- .../resource/v1alpha3/devicetaintrule.go | 16 + .../resource/v1alpha3/devicetaintrulespec.go | 2 +- .../v1alpha3/devicetaintrulestatus.go | 70 +++ .../resource/v1alpha3/devicetaintselector.go | 29 -- .../resource/v1beta1/basicdevice.go | 4 +- .../resource/v1beta1/devicetaint.go | 6 +- .../resource/v1beta1/resourceslicespec.go | 2 +- .../resource/v1beta2/device.go | 4 +- .../resource/v1beta2/devicetaint.go | 6 +- .../resource/v1beta2/resourceslicespec.go | 2 +- .../client-go/applyconfigurations/utils.go | 4 - .../resource/v1alpha3/devicetaintrule.go | 4 + 53 files changed, 1468 insertions(+), 551 deletions(-) create mode 100644 staging/src/k8s.io/api/testdata/v1.33.0/resource.k8s.io.v1alpha3.DeviceTaintRule.after_roundtrip.json create mode 100644 staging/src/k8s.io/api/testdata/v1.33.0/resource.k8s.io.v1alpha3.DeviceTaintRule.after_roundtrip.pb create mode 100644 staging/src/k8s.io/api/testdata/v1.33.0/resource.k8s.io.v1alpha3.DeviceTaintRule.after_roundtrip.yaml create mode 100644 staging/src/k8s.io/api/testdata/v1.34.0/resource.k8s.io.v1alpha3.DeviceTaintRule.after_roundtrip.json create mode 100644 staging/src/k8s.io/api/testdata/v1.34.0/resource.k8s.io.v1alpha3.DeviceTaintRule.after_roundtrip.pb create mode 100644 staging/src/k8s.io/api/testdata/v1.34.0/resource.k8s.io.v1alpha3.DeviceTaintRule.after_roundtrip.yaml delete mode 100644 staging/src/k8s.io/client-go/applyconfigurations/resource/v1alpha3/celdeviceselector.go delete mode 100644 staging/src/k8s.io/client-go/applyconfigurations/resource/v1alpha3/deviceselector.go create mode 100644 staging/src/k8s.io/client-go/applyconfigurations/resource/v1alpha3/devicetaintrulestatus.go diff --git a/api/discovery/aggregated_v2.json b/api/discovery/aggregated_v2.json index 581cfcaf24a..0b16126fe47 100644 --- a/api/discovery/aggregated_v2.json +++ b/api/discovery/aggregated_v2.json @@ -2101,6 +2101,21 @@ }, "scope": "Cluster", "singularResource": "devicetaintrule", + "subresources": [ + { + "responseKind": { + "group": "", + "kind": "DeviceTaintRule", + "version": "" + }, + "subresource": "status", + "verbs": [ + "get", + "patch", + "update" + ] + } + ], "verbs": [ "create", "delete", diff --git a/api/discovery/apis__resource.k8s.io__v1alpha3.json b/api/discovery/apis__resource.k8s.io__v1alpha3.json index 70b14d2a677..9e91b4e383b 100644 --- a/api/discovery/apis__resource.k8s.io__v1alpha3.json +++ b/api/discovery/apis__resource.k8s.io__v1alpha3.json @@ -19,6 +19,17 @@ "update", "watch" ] + }, + { + "kind": "DeviceTaintRule", + "name": "devicetaintrules/status", + "namespaced": false, + "singularName": "", + "verbs": [ + "get", + "patch", + "update" + ] } ] } diff --git a/api/openapi-spec/swagger.json b/api/openapi-spec/swagger.json index 32764b9a643..97c0cfeade2 100644 --- a/api/openapi-spec/swagger.json +++ b/api/openapi-spec/swagger.json @@ -15438,7 +15438,7 @@ "description": "NodeSelector defines the nodes where the device is available.\n\nMust use exactly one term.\n\nMust only be set if Spec.PerDeviceNodeSelection is set to true. At most one of NodeName, NodeSelector and AllNodes can be set." }, "taints": { - "description": "If specified, these are the driver-defined taints.\n\nThe maximum number of taints is 4.\n\nThis is an alpha field and requires enabling the DRADeviceTaints feature gate.", + "description": "If specified, these are the driver-defined taints.\n\nThe maximum number of taints is 16. If taints are set for any device in a ResourceSlice, then the maximum number of allowed devices per ResourceSlice is 64 instead of 128.\n\nThis is an alpha field and requires enabling the DRADeviceTaints feature gate.", "items": { "$ref": "#/definitions/io.k8s.api.resource.v1.DeviceTaint" }, @@ -15885,7 +15885,7 @@ "description": "The device this taint is attached to has the \"effect\" on any claim which does not tolerate the taint and, through the claim, to pods using the claim.", "properties": { "effect": { - "description": "The effect of the taint on claims that do not tolerate the taint and through such claims on the pods using them. Valid effects are NoSchedule and NoExecute. PreferNoSchedule as used for nodes is not valid here.", + "description": "The effect of the taint on claims that do not tolerate the taint and through such claims on the pods using them.\n\nValid effects are None, NoSchedule and NoExecute. PreferNoSchedule as used for nodes is not valid here. More effects may get added in the future. Consumers must treat unknown effects like None.", "type": "string" }, "key": { @@ -16349,7 +16349,7 @@ "type": "boolean" }, "devices": { - "description": "Devices lists some or all of the devices in this pool.\n\nMust not have more than 128 entries.", + "description": "Devices lists some or all of the devices in this pool.\n\nMust not have more than 128 entries. If any device uses taints the limit is 64.", "items": { "$ref": "#/definitions/io.k8s.api.resource.v1.Device" }, @@ -16391,34 +16391,11 @@ ], "type": "object" }, - "io.k8s.api.resource.v1alpha3.CELDeviceSelector": { - "description": "CELDeviceSelector contains a CEL expression for selecting a device.", - "properties": { - "expression": { - "description": "Expression is a CEL expression which evaluates a single device. It must evaluate to true when the device under consideration satisfies the desired criteria, and false when it does not. Any other result is an error and causes allocation of devices to abort.\n\nThe expression's input is an object named \"device\", which carries the following properties:\n - driver (string): the name of the driver which defines this device.\n - attributes (map[string]object): the device's attributes, grouped by prefix\n (e.g. device.attributes[\"dra.example.com\"] evaluates to an object with all\n of the attributes which were prefixed by \"dra.example.com\".\n - capacity (map[string]object): the device's capacities, grouped by prefix.\n\nExample: Consider a device with driver=\"dra.example.com\", which exposes two attributes named \"model\" and \"ext.example.com/family\" and which exposes one capacity named \"modules\". This input to this expression would have the following fields:\n\n device.driver\n device.attributes[\"dra.example.com\"].model\n device.attributes[\"ext.example.com\"].family\n device.capacity[\"dra.example.com\"].modules\n\nThe device.driver field can be used to check for a specific driver, either as a high-level precondition (i.e. you only want to consider devices from this driver) or as part of a multi-clause expression that is meant to consider devices from different drivers.\n\nThe value type of each attribute is defined by the device definition, and users who write these expressions must consult the documentation for their specific drivers. The value type of each capacity is Quantity.\n\nIf an unknown prefix is used as a lookup in either device.attributes or device.capacity, an empty map will be returned. Any reference to an unknown field will cause an evaluation error and allocation to abort.\n\nA robust expression should check for the existence of attributes before referencing them.\n\nFor ease of use, the cel.bind() function is enabled, and can be used to simplify expressions that access multiple attributes with the same domain. For example:\n\n cel.bind(dra, device.attributes[\"dra.example.com\"], dra.someBool && dra.anotherBool)\n\nThe length of the expression must be smaller or equal to 10 Ki. The cost of evaluating it is also limited based on the estimated number of logical steps.", - "type": "string" - } - }, - "required": [ - "expression" - ], - "type": "object" - }, - "io.k8s.api.resource.v1alpha3.DeviceSelector": { - "description": "DeviceSelector must have exactly one field set.", - "properties": { - "cel": { - "$ref": "#/definitions/io.k8s.api.resource.v1alpha3.CELDeviceSelector", - "description": "CEL contains a CEL expression for selecting a device." - } - }, - "type": "object" - }, "io.k8s.api.resource.v1alpha3.DeviceTaint": { "description": "The device this taint is attached to has the \"effect\" on any claim which does not tolerate the taint and, through the claim, to pods using the claim.", "properties": { "effect": { - "description": "The effect of the taint on claims that do not tolerate the taint and through such claims on the pods using them. Valid effects are NoSchedule and NoExecute. PreferNoSchedule as used for nodes is not valid here.", + "description": "The effect of the taint on claims that do not tolerate the taint and through such claims on the pods using them.\n\nValid effects are None, NoSchedule and NoExecute. PreferNoSchedule as used for nodes is not valid here. More effects may get added in the future. Consumers must treat unknown effects like None.", "type": "string" }, "key": { @@ -16458,6 +16435,10 @@ "spec": { "$ref": "#/definitions/io.k8s.api.resource.v1alpha3.DeviceTaintRuleSpec", "description": "Spec specifies the selector and one taint.\n\nChanging the spec automatically increments the metadata.generation number." + }, + "status": { + "$ref": "#/definitions/io.k8s.api.resource.v1alpha3.DeviceTaintRuleStatus", + "description": "Status provides information about what was requested in the spec." } }, "required": [ @@ -16512,7 +16493,7 @@ "properties": { "deviceSelector": { "$ref": "#/definitions/io.k8s.api.resource.v1alpha3.DeviceTaintSelector", - "description": "DeviceSelector defines which device(s) the taint is applied to. All selector criteria must be satified for a device to match. The empty selector matches all devices. Without a selector, no devices are matches." + "description": "DeviceSelector defines which device(s) the taint is applied to. All selector criteria must be satisfied for a device to match. The empty selector matches all devices. Without a selector, no devices are matches." }, "taint": { "$ref": "#/definitions/io.k8s.api.resource.v1alpha3.DeviceTaint", @@ -16524,6 +16505,25 @@ ], "type": "object" }, + "io.k8s.api.resource.v1alpha3.DeviceTaintRuleStatus": { + "description": "DeviceTaintRuleStatus provides information about an on-going pod eviction.", + "properties": { + "conditions": { + "description": "Conditions provide information about the state of the DeviceTaintRule and the cluster at some point in time, in a machine-readable and human-readable format.\n\nThe following condition is currently defined as part of this API, more may get added: - Type: EvictionInProgress - Status: True if there are currently pods which need to be evicted, False otherwise\n (includes the effects which don't cause eviction).\n- Reason: not specified, may change - Message: includes information about number of pending pods and already evicted pods\n in a human-readable format, updated periodically, may change\n\nFor `effect: None`, the condition above gets set once for each change to the spec, with the message containing information about what would happen if the effect was `NoExecute`. This feedback can be used to decide whether changing the effect to `NoExecute` will work as intended. It only gets set once to avoid having to constantly update the status.\n\nMust have 8 or fewer entries.", + "items": { + "$ref": "#/definitions/io.k8s.apimachinery.pkg.apis.meta.v1.Condition" + }, + "type": "array", + "x-kubernetes-list-map-keys": [ + "type" + ], + "x-kubernetes-list-type": "map", + "x-kubernetes-patch-merge-key": "type", + "x-kubernetes-patch-strategy": "merge" + } + }, + "type": "object" + }, "io.k8s.api.resource.v1alpha3.DeviceTaintSelector": { "description": "DeviceTaintSelector defines which device(s) a DeviceTaintRule applies to. The empty selector matches all devices. Without a selector, no devices are matched.", "properties": { @@ -16531,10 +16531,6 @@ "description": "If device is set, only devices with that name are selected. This field corresponds to slice.spec.devices[].name.\n\nSetting also driver and pool may be required to avoid ambiguity, but is not required.", "type": "string" }, - "deviceClassName": { - "description": "If DeviceClassName is set, the selectors defined there must be satisfied by a device to be selected. This field corresponds to class.metadata.name.", - "type": "string" - }, "driver": { "description": "If driver is set, only devices from that driver are selected. This fields corresponds to slice.spec.driver.", "type": "string" @@ -16542,14 +16538,6 @@ "pool": { "description": "If pool is set, only devices in that pool are selected.\n\nAlso setting the driver name may be useful to avoid ambiguity when different drivers use the same pool name, but this is not required because selecting pools from different drivers may also be useful, for example when drivers with node-local devices use the node name as their pool name.", "type": "string" - }, - "selectors": { - "description": "Selectors contains the same selection criteria as a ResourceClaim. Currently, CEL expressions are supported. All of these selectors must be satisfied.", - "items": { - "$ref": "#/definitions/io.k8s.api.resource.v1alpha3.DeviceSelector" - }, - "type": "array", - "x-kubernetes-list-type": "atomic" } }, "type": "object" @@ -16680,7 +16668,7 @@ "description": "NodeSelector defines the nodes where the device is available.\n\nMust use exactly one term.\n\nMust only be set if Spec.PerDeviceNodeSelection is set to true. At most one of NodeName, NodeSelector and AllNodes can be set." }, "taints": { - "description": "If specified, these are the driver-defined taints.\n\nThe maximum number of taints is 4.\n\nThis is an alpha field and requires enabling the DRADeviceTaints feature gate.", + "description": "If specified, these are the driver-defined taints.\n\nThe maximum number of taints is 16. If taints are set for any device in a ResourceSlice, then the maximum number of allowed devices per ResourceSlice is 64 instead of 128.\n\nThis is an alpha field and requires enabling the DRADeviceTaints feature gate.", "items": { "$ref": "#/definitions/io.k8s.api.resource.v1beta1.DeviceTaint" }, @@ -17277,7 +17265,7 @@ "description": "The device this taint is attached to has the \"effect\" on any claim which does not tolerate the taint and, through the claim, to pods using the claim.", "properties": { "effect": { - "description": "The effect of the taint on claims that do not tolerate the taint and through such claims on the pods using them. Valid effects are NoSchedule and NoExecute. PreferNoSchedule as used for nodes is not valid here.", + "description": "The effect of the taint on claims that do not tolerate the taint and through such claims on the pods using them.\n\nValid effects are None, NoSchedule and NoExecute. PreferNoSchedule as used for nodes is not valid here. More effects may get added in the future. Consumers must treat unknown effects like None.", "type": "string" }, "key": { @@ -17695,7 +17683,7 @@ "type": "boolean" }, "devices": { - "description": "Devices lists some or all of the devices in this pool.\n\nMust not have more than 128 entries.", + "description": "Devices lists some or all of the devices in this pool.\n\nMust not have more than 128 entries. If any device uses taints the limit is 64.", "items": { "$ref": "#/definitions/io.k8s.api.resource.v1beta1.Device" }, @@ -17970,7 +17958,7 @@ "description": "NodeSelector defines the nodes where the device is available.\n\nMust use exactly one term.\n\nMust only be set if Spec.PerDeviceNodeSelection is set to true. At most one of NodeName, NodeSelector and AllNodes can be set." }, "taints": { - "description": "If specified, these are the driver-defined taints.\n\nThe maximum number of taints is 4.\n\nThis is an alpha field and requires enabling the DRADeviceTaints feature gate.", + "description": "If specified, these are the driver-defined taints.\n\nThe maximum number of taints is 16. If taints are set for any device in a ResourceSlice, then the maximum number of allowed devices per ResourceSlice is 64 instead of 128.\n\nThis is an alpha field and requires enabling the DRADeviceTaints feature gate.", "items": { "$ref": "#/definitions/io.k8s.api.resource.v1beta2.DeviceTaint" }, @@ -18417,7 +18405,7 @@ "description": "The device this taint is attached to has the \"effect\" on any claim which does not tolerate the taint and, through the claim, to pods using the claim.", "properties": { "effect": { - "description": "The effect of the taint on claims that do not tolerate the taint and through such claims on the pods using them. Valid effects are NoSchedule and NoExecute. PreferNoSchedule as used for nodes is not valid here.", + "description": "The effect of the taint on claims that do not tolerate the taint and through such claims on the pods using them.\n\nValid effects are None, NoSchedule and NoExecute. PreferNoSchedule as used for nodes is not valid here. More effects may get added in the future. Consumers must treat unknown effects like None.", "type": "string" }, "key": { @@ -18881,7 +18869,7 @@ "type": "boolean" }, "devices": { - "description": "Devices lists some or all of the devices in this pool.\n\nMust not have more than 128 entries.", + "description": "Devices lists some or all of the devices in this pool.\n\nMust not have more than 128 entries. If any device uses taints the limit is 64.", "items": { "$ref": "#/definitions/io.k8s.api.resource.v1beta2.Device" }, @@ -81346,6 +81334,197 @@ } } }, + "/apis/resource.k8s.io/v1alpha3/devicetaintrules/{name}/status": { + "get": { + "consumes": [ + "*/*" + ], + "description": "read status of the specified DeviceTaintRule", + "operationId": "readResourceV1alpha3DeviceTaintRuleStatus", + "produces": [ + "application/json", + "application/yaml", + "application/vnd.kubernetes.protobuf", + "application/cbor" + ], + "responses": { + "200": { + "description": "OK", + "schema": { + "$ref": "#/definitions/io.k8s.api.resource.v1alpha3.DeviceTaintRule" + } + }, + "401": { + "description": "Unauthorized" + } + }, + "schemes": [ + "https" + ], + "tags": [ + "resource_v1alpha3" + ], + "x-kubernetes-action": "get", + "x-kubernetes-group-version-kind": { + "group": "resource.k8s.io", + "kind": "DeviceTaintRule", + "version": "v1alpha3" + } + }, + "parameters": [ + { + "description": "name of the DeviceTaintRule", + "in": "path", + "name": "name", + "required": true, + "type": "string", + "uniqueItems": true + }, + { + "$ref": "#/parameters/pretty-tJGM1-ng" + } + ], + "patch": { + "consumes": [ + "application/json-patch+json", + "application/merge-patch+json", + "application/strategic-merge-patch+json", + "application/apply-patch+yaml", + "application/apply-patch+cbor" + ], + "description": "partially update status of the specified DeviceTaintRule", + "operationId": "patchResourceV1alpha3DeviceTaintRuleStatus", + "parameters": [ + { + "$ref": "#/parameters/body-78PwaGsr" + }, + { + "description": "When present, indicates that modifications should not be persisted. An invalid or unrecognized dryRun directive will result in an error response and no further processing of the request. Valid values are: - All: all dry run stages will be processed", + "in": "query", + "name": "dryRun", + "type": "string", + "uniqueItems": true + }, + { + "$ref": "#/parameters/fieldManager-7c6nTn1T" + }, + { + "description": "fieldValidation instructs the server on how to handle objects in the request (POST/PUT/PATCH) containing unknown or duplicate fields. Valid values are: - Ignore: This will ignore any unknown fields that are silently dropped from the object, and will ignore all but the last duplicate field that the decoder encounters. This is the default behavior prior to v1.23. - Warn: This will send a warning via the standard warning response header for each unknown field that is dropped from the object, and for each duplicate field that is encountered. The request will still succeed if there are no other errors, and will only persist the last of any duplicate fields. This is the default in v1.23+ - Strict: This will fail the request with a BadRequest error if any unknown fields would be dropped from the object, or if any duplicate fields are present. The error returned from the server will contain all unknown and duplicate fields encountered.", + "in": "query", + "name": "fieldValidation", + "type": "string", + "uniqueItems": true + }, + { + "$ref": "#/parameters/force-tOGGb0Yi" + } + ], + "produces": [ + "application/json", + "application/yaml", + "application/vnd.kubernetes.protobuf", + "application/cbor" + ], + "responses": { + "200": { + "description": "OK", + "schema": { + "$ref": "#/definitions/io.k8s.api.resource.v1alpha3.DeviceTaintRule" + } + }, + "201": { + "description": "Created", + "schema": { + "$ref": "#/definitions/io.k8s.api.resource.v1alpha3.DeviceTaintRule" + } + }, + "401": { + "description": "Unauthorized" + } + }, + "schemes": [ + "https" + ], + "tags": [ + "resource_v1alpha3" + ], + "x-kubernetes-action": "patch", + "x-kubernetes-group-version-kind": { + "group": "resource.k8s.io", + "kind": "DeviceTaintRule", + "version": "v1alpha3" + } + }, + "put": { + "consumes": [ + "*/*" + ], + "description": "replace status of the specified DeviceTaintRule", + "operationId": "replaceResourceV1alpha3DeviceTaintRuleStatus", + "parameters": [ + { + "in": "body", + "name": "body", + "required": true, + "schema": { + "$ref": "#/definitions/io.k8s.api.resource.v1alpha3.DeviceTaintRule" + } + }, + { + "description": "When present, indicates that modifications should not be persisted. An invalid or unrecognized dryRun directive will result in an error response and no further processing of the request. Valid values are: - All: all dry run stages will be processed", + "in": "query", + "name": "dryRun", + "type": "string", + "uniqueItems": true + }, + { + "$ref": "#/parameters/fieldManager-Qy4HdaTW" + }, + { + "description": "fieldValidation instructs the server on how to handle objects in the request (POST/PUT/PATCH) containing unknown or duplicate fields. Valid values are: - Ignore: This will ignore any unknown fields that are silently dropped from the object, and will ignore all but the last duplicate field that the decoder encounters. This is the default behavior prior to v1.23. - Warn: This will send a warning via the standard warning response header for each unknown field that is dropped from the object, and for each duplicate field that is encountered. The request will still succeed if there are no other errors, and will only persist the last of any duplicate fields. This is the default in v1.23+ - Strict: This will fail the request with a BadRequest error if any unknown fields would be dropped from the object, or if any duplicate fields are present. The error returned from the server will contain all unknown and duplicate fields encountered.", + "in": "query", + "name": "fieldValidation", + "type": "string", + "uniqueItems": true + } + ], + "produces": [ + "application/json", + "application/yaml", + "application/vnd.kubernetes.protobuf", + "application/cbor" + ], + "responses": { + "200": { + "description": "OK", + "schema": { + "$ref": "#/definitions/io.k8s.api.resource.v1alpha3.DeviceTaintRule" + } + }, + "201": { + "description": "Created", + "schema": { + "$ref": "#/definitions/io.k8s.api.resource.v1alpha3.DeviceTaintRule" + } + }, + "401": { + "description": "Unauthorized" + } + }, + "schemes": [ + "https" + ], + "tags": [ + "resource_v1alpha3" + ], + "x-kubernetes-action": "put", + "x-kubernetes-group-version-kind": { + "group": "resource.k8s.io", + "kind": "DeviceTaintRule", + "version": "v1alpha3" + } + } + }, "/apis/resource.k8s.io/v1alpha3/watch/devicetaintrules": { "get": { "consumes": [ diff --git a/api/openapi-spec/v3/apis__resource.k8s.io__v1_openapi.json b/api/openapi-spec/v3/apis__resource.k8s.io__v1_openapi.json index 5502b24745b..2bc7d12143c 100644 --- a/api/openapi-spec/v3/apis__resource.k8s.io__v1_openapi.json +++ b/api/openapi-spec/v3/apis__resource.k8s.io__v1_openapi.json @@ -401,7 +401,7 @@ "description": "NodeSelector defines the nodes where the device is available.\n\nMust use exactly one term.\n\nMust only be set if Spec.PerDeviceNodeSelection is set to true. At most one of NodeName, NodeSelector and AllNodes can be set." }, "taints": { - "description": "If specified, these are the driver-defined taints.\n\nThe maximum number of taints is 4.\n\nThis is an alpha field and requires enabling the DRADeviceTaints feature gate.", + "description": "If specified, these are the driver-defined taints.\n\nThe maximum number of taints is 16. If taints are set for any device in a ResourceSlice, then the maximum number of allowed devices per ResourceSlice is 64 instead of 128.\n\nThis is an alpha field and requires enabling the DRADeviceTaints feature gate.", "items": { "allOf": [ { @@ -980,7 +980,7 @@ "properties": { "effect": { "default": "", - "description": "The effect of the taint on claims that do not tolerate the taint and through such claims on the pods using them. Valid effects are NoSchedule and NoExecute. PreferNoSchedule as used for nodes is not valid here.", + "description": "The effect of the taint on claims that do not tolerate the taint and through such claims on the pods using them.\n\nValid effects are None, NoSchedule and NoExecute. PreferNoSchedule as used for nodes is not valid here. More effects may get added in the future. Consumers must treat unknown effects like None.", "type": "string" }, "key": { @@ -1571,7 +1571,7 @@ "type": "boolean" }, "devices": { - "description": "Devices lists some or all of the devices in this pool.\n\nMust not have more than 128 entries.", + "description": "Devices lists some or all of the devices in this pool.\n\nMust not have more than 128 entries. If any device uses taints the limit is 64.", "items": { "allOf": [ { diff --git a/api/openapi-spec/v3/apis__resource.k8s.io__v1alpha3_openapi.json b/api/openapi-spec/v3/apis__resource.k8s.io__v1alpha3_openapi.json index f3b0d65a13d..1f20120ccae 100644 --- a/api/openapi-spec/v3/apis__resource.k8s.io__v1alpha3_openapi.json +++ b/api/openapi-spec/v3/apis__resource.k8s.io__v1alpha3_openapi.json @@ -1,40 +1,12 @@ { "components": { "schemas": { - "io.k8s.api.resource.v1alpha3.CELDeviceSelector": { - "description": "CELDeviceSelector contains a CEL expression for selecting a device.", - "properties": { - "expression": { - "default": "", - "description": "Expression is a CEL expression which evaluates a single device. It must evaluate to true when the device under consideration satisfies the desired criteria, and false when it does not. Any other result is an error and causes allocation of devices to abort.\n\nThe expression's input is an object named \"device\", which carries the following properties:\n - driver (string): the name of the driver which defines this device.\n - attributes (map[string]object): the device's attributes, grouped by prefix\n (e.g. device.attributes[\"dra.example.com\"] evaluates to an object with all\n of the attributes which were prefixed by \"dra.example.com\".\n - capacity (map[string]object): the device's capacities, grouped by prefix.\n\nExample: Consider a device with driver=\"dra.example.com\", which exposes two attributes named \"model\" and \"ext.example.com/family\" and which exposes one capacity named \"modules\". This input to this expression would have the following fields:\n\n device.driver\n device.attributes[\"dra.example.com\"].model\n device.attributes[\"ext.example.com\"].family\n device.capacity[\"dra.example.com\"].modules\n\nThe device.driver field can be used to check for a specific driver, either as a high-level precondition (i.e. you only want to consider devices from this driver) or as part of a multi-clause expression that is meant to consider devices from different drivers.\n\nThe value type of each attribute is defined by the device definition, and users who write these expressions must consult the documentation for their specific drivers. The value type of each capacity is Quantity.\n\nIf an unknown prefix is used as a lookup in either device.attributes or device.capacity, an empty map will be returned. Any reference to an unknown field will cause an evaluation error and allocation to abort.\n\nA robust expression should check for the existence of attributes before referencing them.\n\nFor ease of use, the cel.bind() function is enabled, and can be used to simplify expressions that access multiple attributes with the same domain. For example:\n\n cel.bind(dra, device.attributes[\"dra.example.com\"], dra.someBool && dra.anotherBool)\n\nThe length of the expression must be smaller or equal to 10 Ki. The cost of evaluating it is also limited based on the estimated number of logical steps.", - "type": "string" - } - }, - "required": [ - "expression" - ], - "type": "object" - }, - "io.k8s.api.resource.v1alpha3.DeviceSelector": { - "description": "DeviceSelector must have exactly one field set.", - "properties": { - "cel": { - "allOf": [ - { - "$ref": "#/components/schemas/io.k8s.api.resource.v1alpha3.CELDeviceSelector" - } - ], - "description": "CEL contains a CEL expression for selecting a device." - } - }, - "type": "object" - }, "io.k8s.api.resource.v1alpha3.DeviceTaint": { "description": "The device this taint is attached to has the \"effect\" on any claim which does not tolerate the taint and, through the claim, to pods using the claim.", "properties": { "effect": { "default": "", - "description": "The effect of the taint on claims that do not tolerate the taint and through such claims on the pods using them. Valid effects are NoSchedule and NoExecute. PreferNoSchedule as used for nodes is not valid here.", + "description": "The effect of the taint on claims that do not tolerate the taint and through such claims on the pods using them.\n\nValid effects are None, NoSchedule and NoExecute. PreferNoSchedule as used for nodes is not valid here. More effects may get added in the future. Consumers must treat unknown effects like None.", "type": "string" }, "key": { @@ -89,6 +61,15 @@ ], "default": {}, "description": "Spec specifies the selector and one taint.\n\nChanging the spec automatically increments the metadata.generation number." + }, + "status": { + "allOf": [ + { + "$ref": "#/components/schemas/io.k8s.api.resource.v1alpha3.DeviceTaintRuleStatus" + } + ], + "default": {}, + "description": "Status provides information about what was requested in the spec." } }, "required": [ @@ -157,7 +138,7 @@ "$ref": "#/components/schemas/io.k8s.api.resource.v1alpha3.DeviceTaintSelector" } ], - "description": "DeviceSelector defines which device(s) the taint is applied to. All selector criteria must be satified for a device to match. The empty selector matches all devices. Without a selector, no devices are matches." + "description": "DeviceSelector defines which device(s) the taint is applied to. All selector criteria must be satisfied for a device to match. The empty selector matches all devices. Without a selector, no devices are matches." }, "taint": { "allOf": [ @@ -174,6 +155,30 @@ ], "type": "object" }, + "io.k8s.api.resource.v1alpha3.DeviceTaintRuleStatus": { + "description": "DeviceTaintRuleStatus provides information about an on-going pod eviction.", + "properties": { + "conditions": { + "description": "Conditions provide information about the state of the DeviceTaintRule and the cluster at some point in time, in a machine-readable and human-readable format.\n\nThe following condition is currently defined as part of this API, more may get added: - Type: EvictionInProgress - Status: True if there are currently pods which need to be evicted, False otherwise\n (includes the effects which don't cause eviction).\n- Reason: not specified, may change - Message: includes information about number of pending pods and already evicted pods\n in a human-readable format, updated periodically, may change\n\nFor `effect: None`, the condition above gets set once for each change to the spec, with the message containing information about what would happen if the effect was `NoExecute`. This feedback can be used to decide whether changing the effect to `NoExecute` will work as intended. It only gets set once to avoid having to constantly update the status.\n\nMust have 8 or fewer entries.", + "items": { + "allOf": [ + { + "$ref": "#/components/schemas/io.k8s.apimachinery.pkg.apis.meta.v1.Condition" + } + ], + "default": {} + }, + "type": "array", + "x-kubernetes-list-map-keys": [ + "type" + ], + "x-kubernetes-list-type": "map", + "x-kubernetes-patch-merge-key": "type", + "x-kubernetes-patch-strategy": "merge" + } + }, + "type": "object" + }, "io.k8s.api.resource.v1alpha3.DeviceTaintSelector": { "description": "DeviceTaintSelector defines which device(s) a DeviceTaintRule applies to. The empty selector matches all devices. Without a selector, no devices are matched.", "properties": { @@ -181,10 +186,6 @@ "description": "If device is set, only devices with that name are selected. This field corresponds to slice.spec.devices[].name.\n\nSetting also driver and pool may be required to avoid ambiguity, but is not required.", "type": "string" }, - "deviceClassName": { - "description": "If DeviceClassName is set, the selectors defined there must be satisfied by a device to be selected. This field corresponds to class.metadata.name.", - "type": "string" - }, "driver": { "description": "If driver is set, only devices from that driver are selected. This fields corresponds to slice.spec.driver.", "type": "string" @@ -192,19 +193,6 @@ "pool": { "description": "If pool is set, only devices in that pool are selected.\n\nAlso setting the driver name may be useful to avoid ambiguity when different drivers use the same pool name, but this is not required because selecting pools from different drivers may also be useful, for example when drivers with node-local devices use the node name as their pool name.", "type": "string" - }, - "selectors": { - "description": "Selectors contains the same selection criteria as a ResourceClaim. Currently, CEL expressions are supported. All of these selectors must be satisfied.", - "items": { - "allOf": [ - { - "$ref": "#/components/schemas/io.k8s.api.resource.v1alpha3.DeviceSelector" - } - ], - "default": {} - }, - "type": "array", - "x-kubernetes-list-type": "atomic" } }, "type": "object" @@ -323,6 +311,52 @@ } ] }, + "io.k8s.apimachinery.pkg.apis.meta.v1.Condition": { + "description": "Condition contains details for one aspect of the current state of this API Resource.", + "properties": { + "lastTransitionTime": { + "allOf": [ + { + "$ref": "#/components/schemas/io.k8s.apimachinery.pkg.apis.meta.v1.Time" + } + ], + "description": "lastTransitionTime is the last time the condition transitioned from one status to another. This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable." + }, + "message": { + "default": "", + "description": "message is a human readable message indicating details about the transition. This may be an empty string.", + "type": "string" + }, + "observedGeneration": { + "description": "observedGeneration represents the .metadata.generation that the condition was set based upon. For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date with respect to the current state of the instance.", + "format": "int64", + "type": "integer" + }, + "reason": { + "default": "", + "description": "reason contains a programmatic identifier indicating the reason for the condition's last transition. Producers of specific condition types may define expected values and meanings for this field, and whether the values are considered a guaranteed API. The value should be a CamelCase string. This field may not be empty.", + "type": "string" + }, + "status": { + "default": "", + "description": "status of the condition, one of True, False, Unknown.", + "type": "string" + }, + "type": { + "default": "", + "description": "type of condition in CamelCase or in foo.example.com/CamelCase.", + "type": "string" + } + }, + "required": [ + "type", + "status", + "lastTransitionTime", + "reason", + "message" + ], + "type": "object" + }, "io.k8s.apimachinery.pkg.apis.meta.v1.DeleteOptions": { "description": "DeleteOptions may be provided when deleting an API object.", "properties": { @@ -2334,6 +2368,315 @@ } } }, + "/apis/resource.k8s.io/v1alpha3/devicetaintrules/{name}/status": { + "get": { + "description": "read status of the specified DeviceTaintRule", + "operationId": "readResourceV1alpha3DeviceTaintRuleStatus", + "responses": { + "200": { + "content": { + "application/cbor": { + "schema": { + "$ref": "#/components/schemas/io.k8s.api.resource.v1alpha3.DeviceTaintRule" + } + }, + "application/json": { + "schema": { + "$ref": "#/components/schemas/io.k8s.api.resource.v1alpha3.DeviceTaintRule" + } + }, + "application/vnd.kubernetes.protobuf": { + "schema": { + "$ref": "#/components/schemas/io.k8s.api.resource.v1alpha3.DeviceTaintRule" + } + }, + "application/yaml": { + "schema": { + "$ref": "#/components/schemas/io.k8s.api.resource.v1alpha3.DeviceTaintRule" + } + } + }, + "description": "OK" + }, + "401": { + "description": "Unauthorized" + } + }, + "tags": [ + "resource_v1alpha3" + ], + "x-kubernetes-action": "get", + "x-kubernetes-group-version-kind": { + "group": "resource.k8s.io", + "kind": "DeviceTaintRule", + "version": "v1alpha3" + } + }, + "parameters": [ + { + "description": "name of the DeviceTaintRule", + "in": "path", + "name": "name", + "required": true, + "schema": { + "type": "string", + "uniqueItems": true + } + }, + { + "description": "If 'true', then the output is pretty printed. Defaults to 'false' unless the user-agent indicates a browser or command-line HTTP tool (curl and wget).", + "in": "query", + "name": "pretty", + "schema": { + "type": "string", + "uniqueItems": true + } + } + ], + "patch": { + "description": "partially update status of the specified DeviceTaintRule", + "operationId": "patchResourceV1alpha3DeviceTaintRuleStatus", + "parameters": [ + { + "description": "When present, indicates that modifications should not be persisted. An invalid or unrecognized dryRun directive will result in an error response and no further processing of the request. Valid values are: - All: all dry run stages will be processed", + "in": "query", + "name": "dryRun", + "schema": { + "type": "string", + "uniqueItems": true + } + }, + { + "description": "fieldManager is a name associated with the actor or entity that is making these changes. The value must be less than or 128 characters long, and only contain printable characters, as defined by https://golang.org/pkg/unicode/#IsPrint. This field is required for apply requests (application/apply-patch) but optional for non-apply patch types (JsonPatch, MergePatch, StrategicMergePatch).", + "in": "query", + "name": "fieldManager", + "schema": { + "type": "string", + "uniqueItems": true + } + }, + { + "description": "fieldValidation instructs the server on how to handle objects in the request (POST/PUT/PATCH) containing unknown or duplicate fields. Valid values are: - Ignore: This will ignore any unknown fields that are silently dropped from the object, and will ignore all but the last duplicate field that the decoder encounters. This is the default behavior prior to v1.23. - Warn: This will send a warning via the standard warning response header for each unknown field that is dropped from the object, and for each duplicate field that is encountered. The request will still succeed if there are no other errors, and will only persist the last of any duplicate fields. This is the default in v1.23+ - Strict: This will fail the request with a BadRequest error if any unknown fields would be dropped from the object, or if any duplicate fields are present. The error returned from the server will contain all unknown and duplicate fields encountered.", + "in": "query", + "name": "fieldValidation", + "schema": { + "type": "string", + "uniqueItems": true + } + }, + { + "description": "Force is going to \"force\" Apply requests. It means user will re-acquire conflicting fields owned by other people. Force flag must be unset for non-apply patch requests.", + "in": "query", + "name": "force", + "schema": { + "type": "boolean", + "uniqueItems": true + } + } + ], + "requestBody": { + "content": { + "application/apply-patch+cbor": { + "schema": { + "$ref": "#/components/schemas/io.k8s.apimachinery.pkg.apis.meta.v1.Patch" + } + }, + "application/apply-patch+yaml": { + "schema": { + "$ref": "#/components/schemas/io.k8s.apimachinery.pkg.apis.meta.v1.Patch" + } + }, + "application/json-patch+json": { + "schema": { + "$ref": "#/components/schemas/io.k8s.apimachinery.pkg.apis.meta.v1.Patch" + } + }, + "application/merge-patch+json": { + "schema": { + "$ref": "#/components/schemas/io.k8s.apimachinery.pkg.apis.meta.v1.Patch" + } + }, + "application/strategic-merge-patch+json": { + "schema": { + "$ref": "#/components/schemas/io.k8s.apimachinery.pkg.apis.meta.v1.Patch" + } + } + }, + "required": true + }, + "responses": { + "200": { + "content": { + "application/cbor": { + "schema": { + "$ref": "#/components/schemas/io.k8s.api.resource.v1alpha3.DeviceTaintRule" + } + }, + "application/json": { + "schema": { + "$ref": "#/components/schemas/io.k8s.api.resource.v1alpha3.DeviceTaintRule" + } + }, + "application/vnd.kubernetes.protobuf": { + "schema": { + "$ref": "#/components/schemas/io.k8s.api.resource.v1alpha3.DeviceTaintRule" + } + }, + "application/yaml": { + "schema": { + "$ref": "#/components/schemas/io.k8s.api.resource.v1alpha3.DeviceTaintRule" + } + } + }, + "description": "OK" + }, + "201": { + "content": { + "application/cbor": { + "schema": { + "$ref": "#/components/schemas/io.k8s.api.resource.v1alpha3.DeviceTaintRule" + } + }, + "application/json": { + "schema": { + "$ref": "#/components/schemas/io.k8s.api.resource.v1alpha3.DeviceTaintRule" + } + }, + "application/vnd.kubernetes.protobuf": { + "schema": { + "$ref": "#/components/schemas/io.k8s.api.resource.v1alpha3.DeviceTaintRule" + } + }, + "application/yaml": { + "schema": { + "$ref": "#/components/schemas/io.k8s.api.resource.v1alpha3.DeviceTaintRule" + } + } + }, + "description": "Created" + }, + "401": { + "description": "Unauthorized" + } + }, + "tags": [ + "resource_v1alpha3" + ], + "x-kubernetes-action": "patch", + "x-kubernetes-group-version-kind": { + "group": "resource.k8s.io", + "kind": "DeviceTaintRule", + "version": "v1alpha3" + } + }, + "put": { + "description": "replace status of the specified DeviceTaintRule", + "operationId": "replaceResourceV1alpha3DeviceTaintRuleStatus", + "parameters": [ + { + "description": "When present, indicates that modifications should not be persisted. An invalid or unrecognized dryRun directive will result in an error response and no further processing of the request. Valid values are: - All: all dry run stages will be processed", + "in": "query", + "name": "dryRun", + "schema": { + "type": "string", + "uniqueItems": true + } + }, + { + "description": "fieldManager is a name associated with the actor or entity that is making these changes. The value must be less than or 128 characters long, and only contain printable characters, as defined by https://golang.org/pkg/unicode/#IsPrint.", + "in": "query", + "name": "fieldManager", + "schema": { + "type": "string", + "uniqueItems": true + } + }, + { + "description": "fieldValidation instructs the server on how to handle objects in the request (POST/PUT/PATCH) containing unknown or duplicate fields. Valid values are: - Ignore: This will ignore any unknown fields that are silently dropped from the object, and will ignore all but the last duplicate field that the decoder encounters. This is the default behavior prior to v1.23. - Warn: This will send a warning via the standard warning response header for each unknown field that is dropped from the object, and for each duplicate field that is encountered. The request will still succeed if there are no other errors, and will only persist the last of any duplicate fields. This is the default in v1.23+ - Strict: This will fail the request with a BadRequest error if any unknown fields would be dropped from the object, or if any duplicate fields are present. The error returned from the server will contain all unknown and duplicate fields encountered.", + "in": "query", + "name": "fieldValidation", + "schema": { + "type": "string", + "uniqueItems": true + } + } + ], + "requestBody": { + "content": { + "*/*": { + "schema": { + "$ref": "#/components/schemas/io.k8s.api.resource.v1alpha3.DeviceTaintRule" + } + } + }, + "required": true + }, + "responses": { + "200": { + "content": { + "application/cbor": { + "schema": { + "$ref": "#/components/schemas/io.k8s.api.resource.v1alpha3.DeviceTaintRule" + } + }, + "application/json": { + "schema": { + "$ref": "#/components/schemas/io.k8s.api.resource.v1alpha3.DeviceTaintRule" + } + }, + "application/vnd.kubernetes.protobuf": { + "schema": { + "$ref": "#/components/schemas/io.k8s.api.resource.v1alpha3.DeviceTaintRule" + } + }, + "application/yaml": { + "schema": { + "$ref": "#/components/schemas/io.k8s.api.resource.v1alpha3.DeviceTaintRule" + } + } + }, + "description": "OK" + }, + "201": { + "content": { + "application/cbor": { + "schema": { + "$ref": "#/components/schemas/io.k8s.api.resource.v1alpha3.DeviceTaintRule" + } + }, + "application/json": { + "schema": { + "$ref": "#/components/schemas/io.k8s.api.resource.v1alpha3.DeviceTaintRule" + } + }, + "application/vnd.kubernetes.protobuf": { + "schema": { + "$ref": "#/components/schemas/io.k8s.api.resource.v1alpha3.DeviceTaintRule" + } + }, + "application/yaml": { + "schema": { + "$ref": "#/components/schemas/io.k8s.api.resource.v1alpha3.DeviceTaintRule" + } + } + }, + "description": "Created" + }, + "401": { + "description": "Unauthorized" + } + }, + "tags": [ + "resource_v1alpha3" + ], + "x-kubernetes-action": "put", + "x-kubernetes-group-version-kind": { + "group": "resource.k8s.io", + "kind": "DeviceTaintRule", + "version": "v1alpha3" + } + } + }, "/apis/resource.k8s.io/v1alpha3/watch/devicetaintrules": { "get": { "description": "watch individual changes to a list of DeviceTaintRule. deprecated: use the 'watch' parameter with a list operation instead.", diff --git a/api/openapi-spec/v3/apis__resource.k8s.io__v1beta1_openapi.json b/api/openapi-spec/v3/apis__resource.k8s.io__v1beta1_openapi.json index 21d7445632e..0c202d70fa0 100644 --- a/api/openapi-spec/v3/apis__resource.k8s.io__v1beta1_openapi.json +++ b/api/openapi-spec/v3/apis__resource.k8s.io__v1beta1_openapi.json @@ -262,7 +262,7 @@ "description": "NodeSelector defines the nodes where the device is available.\n\nMust use exactly one term.\n\nMust only be set if Spec.PerDeviceNodeSelection is set to true. At most one of NodeName, NodeSelector and AllNodes can be set." }, "taints": { - "description": "If specified, these are the driver-defined taints.\n\nThe maximum number of taints is 4.\n\nThis is an alpha field and requires enabling the DRADeviceTaints feature gate.", + "description": "If specified, these are the driver-defined taints.\n\nThe maximum number of taints is 16. If taints are set for any device in a ResourceSlice, then the maximum number of allowed devices per ResourceSlice is 64 instead of 128.\n\nThis is an alpha field and requires enabling the DRADeviceTaints feature gate.", "items": { "allOf": [ { @@ -1038,7 +1038,7 @@ "properties": { "effect": { "default": "", - "description": "The effect of the taint on claims that do not tolerate the taint and through such claims on the pods using them. Valid effects are NoSchedule and NoExecute. PreferNoSchedule as used for nodes is not valid here.", + "description": "The effect of the taint on claims that do not tolerate the taint and through such claims on the pods using them.\n\nValid effects are None, NoSchedule and NoExecute. PreferNoSchedule as used for nodes is not valid here. More effects may get added in the future. Consumers must treat unknown effects like None.", "type": "string" }, "key": { @@ -1568,7 +1568,7 @@ "type": "boolean" }, "devices": { - "description": "Devices lists some or all of the devices in this pool.\n\nMust not have more than 128 entries.", + "description": "Devices lists some or all of the devices in this pool.\n\nMust not have more than 128 entries. If any device uses taints the limit is 64.", "items": { "allOf": [ { diff --git a/api/openapi-spec/v3/apis__resource.k8s.io__v1beta2_openapi.json b/api/openapi-spec/v3/apis__resource.k8s.io__v1beta2_openapi.json index c8165784b90..9a9185e8f45 100644 --- a/api/openapi-spec/v3/apis__resource.k8s.io__v1beta2_openapi.json +++ b/api/openapi-spec/v3/apis__resource.k8s.io__v1beta2_openapi.json @@ -401,7 +401,7 @@ "description": "NodeSelector defines the nodes where the device is available.\n\nMust use exactly one term.\n\nMust only be set if Spec.PerDeviceNodeSelection is set to true. At most one of NodeName, NodeSelector and AllNodes can be set." }, "taints": { - "description": "If specified, these are the driver-defined taints.\n\nThe maximum number of taints is 4.\n\nThis is an alpha field and requires enabling the DRADeviceTaints feature gate.", + "description": "If specified, these are the driver-defined taints.\n\nThe maximum number of taints is 16. If taints are set for any device in a ResourceSlice, then the maximum number of allowed devices per ResourceSlice is 64 instead of 128.\n\nThis is an alpha field and requires enabling the DRADeviceTaints feature gate.", "items": { "allOf": [ { @@ -980,7 +980,7 @@ "properties": { "effect": { "default": "", - "description": "The effect of the taint on claims that do not tolerate the taint and through such claims on the pods using them. Valid effects are NoSchedule and NoExecute. PreferNoSchedule as used for nodes is not valid here.", + "description": "The effect of the taint on claims that do not tolerate the taint and through such claims on the pods using them.\n\nValid effects are None, NoSchedule and NoExecute. PreferNoSchedule as used for nodes is not valid here. More effects may get added in the future. Consumers must treat unknown effects like None.", "type": "string" }, "key": { @@ -1571,7 +1571,7 @@ "type": "boolean" }, "devices": { - "description": "Devices lists some or all of the devices in this pool.\n\nMust not have more than 128 entries.", + "description": "Devices lists some or all of the devices in this pool.\n\nMust not have more than 128 entries. If any device uses taints the limit is 64.", "items": { "allOf": [ { diff --git a/pkg/apis/resource/v1/zz_generated.validations.go b/pkg/apis/resource/v1/zz_generated.validations.go index d07eb637a75..165210cea78 100644 --- a/pkg/apis/resource/v1/zz_generated.validations.go +++ b/pkg/apis/resource/v1/zz_generated.validations.go @@ -1188,7 +1188,7 @@ func Validate_DeviceTaint(ctx context.Context, op operation.Operation, fldPath * return errs } -var symbolsForDeviceTaintEffect = sets.New(resourcev1.DeviceTaintEffectNoExecute, resourcev1.DeviceTaintEffectNoSchedule) +var symbolsForDeviceTaintEffect = sets.New(resourcev1.DeviceTaintEffectNoExecute, resourcev1.DeviceTaintEffectNoSchedule, resourcev1.DeviceTaintEffectNone) // Validate_DeviceTaintEffect validates an instance of DeviceTaintEffect according // to declarative validation rules in the API schema. diff --git a/pkg/apis/resource/v1alpha3/zz_generated.conversion.go b/pkg/apis/resource/v1alpha3/zz_generated.conversion.go index aac8aca97cf..e5c6ee21cf9 100644 --- a/pkg/apis/resource/v1alpha3/zz_generated.conversion.go +++ b/pkg/apis/resource/v1alpha3/zz_generated.conversion.go @@ -98,6 +98,16 @@ func RegisterConversions(s *runtime.Scheme) error { }); err != nil { return err } + if err := s.AddGeneratedConversionFunc((*resourcev1alpha3.DeviceTaintRuleStatus)(nil), (*resource.DeviceTaintRuleStatus)(nil), func(a, b interface{}, scope conversion.Scope) error { + return Convert_v1alpha3_DeviceTaintRuleStatus_To_resource_DeviceTaintRuleStatus(a.(*resourcev1alpha3.DeviceTaintRuleStatus), b.(*resource.DeviceTaintRuleStatus), scope) + }); err != nil { + return err + } + if err := s.AddGeneratedConversionFunc((*resource.DeviceTaintRuleStatus)(nil), (*resourcev1alpha3.DeviceTaintRuleStatus)(nil), func(a, b interface{}, scope conversion.Scope) error { + return Convert_resource_DeviceTaintRuleStatus_To_v1alpha3_DeviceTaintRuleStatus(a.(*resource.DeviceTaintRuleStatus), b.(*resourcev1alpha3.DeviceTaintRuleStatus), scope) + }); err != nil { + return err + } if err := s.AddGeneratedConversionFunc((*resourcev1alpha3.DeviceTaintSelector)(nil), (*resource.DeviceTaintSelector)(nil), func(a, b interface{}, scope conversion.Scope) error { return Convert_v1alpha3_DeviceTaintSelector_To_resource_DeviceTaintSelector(a.(*resourcev1alpha3.DeviceTaintSelector), b.(*resource.DeviceTaintSelector), scope) }); err != nil { @@ -182,6 +192,9 @@ func autoConvert_v1alpha3_DeviceTaintRule_To_resource_DeviceTaintRule(in *resour if err := Convert_v1alpha3_DeviceTaintRuleSpec_To_resource_DeviceTaintRuleSpec(&in.Spec, &out.Spec, s); err != nil { return err } + if err := Convert_v1alpha3_DeviceTaintRuleStatus_To_resource_DeviceTaintRuleStatus(&in.Status, &out.Status, s); err != nil { + return err + } return nil } @@ -195,6 +208,9 @@ func autoConvert_resource_DeviceTaintRule_To_v1alpha3_DeviceTaintRule(in *resour if err := Convert_resource_DeviceTaintRuleSpec_To_v1alpha3_DeviceTaintRuleSpec(&in.Spec, &out.Spec, s); err != nil { return err } + if err := Convert_resource_DeviceTaintRuleStatus_To_v1alpha3_DeviceTaintRuleStatus(&in.Status, &out.Status, s); err != nil { + return err + } return nil } @@ -251,12 +267,30 @@ func Convert_resource_DeviceTaintRuleSpec_To_v1alpha3_DeviceTaintRuleSpec(in *re return autoConvert_resource_DeviceTaintRuleSpec_To_v1alpha3_DeviceTaintRuleSpec(in, out, s) } +func autoConvert_v1alpha3_DeviceTaintRuleStatus_To_resource_DeviceTaintRuleStatus(in *resourcev1alpha3.DeviceTaintRuleStatus, out *resource.DeviceTaintRuleStatus, s conversion.Scope) error { + out.Conditions = *(*[]v1.Condition)(unsafe.Pointer(&in.Conditions)) + return nil +} + +// Convert_v1alpha3_DeviceTaintRuleStatus_To_resource_DeviceTaintRuleStatus is an autogenerated conversion function. +func Convert_v1alpha3_DeviceTaintRuleStatus_To_resource_DeviceTaintRuleStatus(in *resourcev1alpha3.DeviceTaintRuleStatus, out *resource.DeviceTaintRuleStatus, s conversion.Scope) error { + return autoConvert_v1alpha3_DeviceTaintRuleStatus_To_resource_DeviceTaintRuleStatus(in, out, s) +} + +func autoConvert_resource_DeviceTaintRuleStatus_To_v1alpha3_DeviceTaintRuleStatus(in *resource.DeviceTaintRuleStatus, out *resourcev1alpha3.DeviceTaintRuleStatus, s conversion.Scope) error { + out.Conditions = *(*[]v1.Condition)(unsafe.Pointer(&in.Conditions)) + return nil +} + +// Convert_resource_DeviceTaintRuleStatus_To_v1alpha3_DeviceTaintRuleStatus is an autogenerated conversion function. +func Convert_resource_DeviceTaintRuleStatus_To_v1alpha3_DeviceTaintRuleStatus(in *resource.DeviceTaintRuleStatus, out *resourcev1alpha3.DeviceTaintRuleStatus, s conversion.Scope) error { + return autoConvert_resource_DeviceTaintRuleStatus_To_v1alpha3_DeviceTaintRuleStatus(in, out, s) +} + func autoConvert_v1alpha3_DeviceTaintSelector_To_resource_DeviceTaintSelector(in *resourcev1alpha3.DeviceTaintSelector, out *resource.DeviceTaintSelector, s conversion.Scope) error { - out.DeviceClassName = (*string)(unsafe.Pointer(in.DeviceClassName)) out.Driver = (*string)(unsafe.Pointer(in.Driver)) out.Pool = (*string)(unsafe.Pointer(in.Pool)) out.Device = (*string)(unsafe.Pointer(in.Device)) - out.Selectors = *(*[]resource.DeviceSelector)(unsafe.Pointer(&in.Selectors)) return nil } @@ -266,11 +300,9 @@ func Convert_v1alpha3_DeviceTaintSelector_To_resource_DeviceTaintSelector(in *re } func autoConvert_resource_DeviceTaintSelector_To_v1alpha3_DeviceTaintSelector(in *resource.DeviceTaintSelector, out *resourcev1alpha3.DeviceTaintSelector, s conversion.Scope) error { - out.DeviceClassName = (*string)(unsafe.Pointer(in.DeviceClassName)) out.Driver = (*string)(unsafe.Pointer(in.Driver)) out.Pool = (*string)(unsafe.Pointer(in.Pool)) out.Device = (*string)(unsafe.Pointer(in.Device)) - out.Selectors = *(*[]resourcev1alpha3.DeviceSelector)(unsafe.Pointer(&in.Selectors)) return nil } diff --git a/pkg/apis/resource/v1beta1/zz_generated.validations.go b/pkg/apis/resource/v1beta1/zz_generated.validations.go index 313dee6e527..230495e953b 100644 --- a/pkg/apis/resource/v1beta1/zz_generated.validations.go +++ b/pkg/apis/resource/v1beta1/zz_generated.validations.go @@ -1279,7 +1279,7 @@ func Validate_DeviceTaint(ctx context.Context, op operation.Operation, fldPath * return errs } -var symbolsForDeviceTaintEffect = sets.New(resourcev1beta1.DeviceTaintEffectNoExecute, resourcev1beta1.DeviceTaintEffectNoSchedule) +var symbolsForDeviceTaintEffect = sets.New(resourcev1beta1.DeviceTaintEffectNoExecute, resourcev1beta1.DeviceTaintEffectNoSchedule, resourcev1beta1.DeviceTaintEffectNone) // Validate_DeviceTaintEffect validates an instance of DeviceTaintEffect according // to declarative validation rules in the API schema. diff --git a/pkg/apis/resource/v1beta2/zz_generated.validations.go b/pkg/apis/resource/v1beta2/zz_generated.validations.go index c853199d228..ee45bd41b6c 100644 --- a/pkg/apis/resource/v1beta2/zz_generated.validations.go +++ b/pkg/apis/resource/v1beta2/zz_generated.validations.go @@ -1212,7 +1212,7 @@ func Validate_DeviceTaint(ctx context.Context, op operation.Operation, fldPath * return errs } -var symbolsForDeviceTaintEffect = sets.New(resourcev1beta2.DeviceTaintEffectNoExecute, resourcev1beta2.DeviceTaintEffectNoSchedule) +var symbolsForDeviceTaintEffect = sets.New(resourcev1beta2.DeviceTaintEffectNoExecute, resourcev1beta2.DeviceTaintEffectNoSchedule, resourcev1beta2.DeviceTaintEffectNone) // Validate_DeviceTaintEffect validates an instance of DeviceTaintEffect according // to declarative validation rules in the API schema. diff --git a/pkg/apis/resource/zz_generated.deepcopy.go b/pkg/apis/resource/zz_generated.deepcopy.go index 937d5a5b35a..1b869906e05 100644 --- a/pkg/apis/resource/zz_generated.deepcopy.go +++ b/pkg/apis/resource/zz_generated.deepcopy.go @@ -831,6 +831,7 @@ func (in *DeviceTaintRule) DeepCopyInto(out *DeviceTaintRule) { out.TypeMeta = in.TypeMeta in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) in.Spec.DeepCopyInto(&out.Spec) + in.Status.DeepCopyInto(&out.Status) return } @@ -907,14 +908,32 @@ func (in *DeviceTaintRuleSpec) DeepCopy() *DeviceTaintRuleSpec { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *DeviceTaintRuleStatus) DeepCopyInto(out *DeviceTaintRuleStatus) { + *out = *in + if in.Conditions != nil { + in, out := &in.Conditions, &out.Conditions + *out = make([]v1.Condition, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DeviceTaintRuleStatus. +func (in *DeviceTaintRuleStatus) DeepCopy() *DeviceTaintRuleStatus { + if in == nil { + return nil + } + out := new(DeviceTaintRuleStatus) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *DeviceTaintSelector) DeepCopyInto(out *DeviceTaintSelector) { *out = *in - if in.DeviceClassName != nil { - in, out := &in.DeviceClassName, &out.DeviceClassName - *out = new(string) - **out = **in - } if in.Driver != nil { in, out := &in.Driver, &out.Driver *out = new(string) @@ -930,13 +949,6 @@ func (in *DeviceTaintSelector) DeepCopyInto(out *DeviceTaintSelector) { *out = new(string) **out = **in } - if in.Selectors != nil { - in, out := &in.Selectors, &out.Selectors - *out = make([]DeviceSelector, len(*in)) - for i := range *in { - (*in)[i].DeepCopyInto(&(*out)[i]) - } - } return } diff --git a/pkg/generated/openapi/zz_generated.openapi.go b/pkg/generated/openapi/zz_generated.openapi.go index 55e56242cc8..b9775bcf6d2 100644 --- a/pkg/generated/openapi/zz_generated.openapi.go +++ b/pkg/generated/openapi/zz_generated.openapi.go @@ -1065,6 +1065,7 @@ func GetOpenAPIDefinitions(ref common.ReferenceCallback) map[string]common.OpenA v1alpha3.DeviceTaintRule{}.OpenAPIModelName(): schema_k8sio_api_resource_v1alpha3_DeviceTaintRule(ref), v1alpha3.DeviceTaintRuleList{}.OpenAPIModelName(): schema_k8sio_api_resource_v1alpha3_DeviceTaintRuleList(ref), v1alpha3.DeviceTaintRuleSpec{}.OpenAPIModelName(): schema_k8sio_api_resource_v1alpha3_DeviceTaintRuleSpec(ref), + v1alpha3.DeviceTaintRuleStatus{}.OpenAPIModelName(): schema_k8sio_api_resource_v1alpha3_DeviceTaintRuleStatus(ref), v1alpha3.DeviceTaintSelector{}.OpenAPIModelName(): schema_k8sio_api_resource_v1alpha3_DeviceTaintSelector(ref), resourcev1beta1.AllocatedDeviceStatus{}.OpenAPIModelName(): schema_k8sio_api_resource_v1beta1_AllocatedDeviceStatus(ref), resourcev1beta1.AllocationResult{}.OpenAPIModelName(): schema_k8sio_api_resource_v1beta1_AllocationResult(ref), @@ -48375,7 +48376,7 @@ func schema_k8sio_api_resource_v1_Device(ref common.ReferenceCallback) common.Op }, }, SchemaProps: spec.SchemaProps{ - Description: "If specified, these are the driver-defined taints.\n\nThe maximum number of taints is 4.\n\nThis is an alpha field and requires enabling the DRADeviceTaints feature gate.", + Description: "If specified, these are the driver-defined taints.\n\nThe maximum number of taints is 16. If taints are set for any device in a ResourceSlice, then the maximum number of allowed devices per ResourceSlice is 64 instead of 128.\n\nThis is an alpha field and requires enabling the DRADeviceTaints feature gate.", Type: []string{"array"}, Items: &spec.SchemaOrArray{ Schema: &spec.Schema{ @@ -49339,11 +49340,11 @@ func schema_k8sio_api_resource_v1_DeviceTaint(ref common.ReferenceCallback) comm }, "effect": { SchemaProps: spec.SchemaProps{ - Description: "The effect of the taint on claims that do not tolerate the taint and through such claims on the pods using them. Valid effects are NoSchedule and NoExecute. PreferNoSchedule as used for nodes is not valid here.\n\n\nPossible enum values:\n - `\"NoExecute\"` Evict any already-running pods that do not tolerate the device taint.\n - `\"NoSchedule\"` Do not allow new pods to schedule which use a tainted device unless they tolerate the taint, but allow all pods submitted to Kubelet without going through the scheduler to start, and allow all already-running pods to continue running.", + Description: "The effect of the taint on claims that do not tolerate the taint and through such claims on the pods using them.\n\nValid effects are None, NoSchedule and NoExecute. PreferNoSchedule as used for nodes is not valid here. More effects may get added in the future. Consumers must treat unknown effects like None.\n\n\nPossible enum values:\n - `\"NoExecute\"` Evict any already-running pods that do not tolerate the device taint.\n - `\"NoSchedule\"` Do not allow new pods to schedule which use a tainted device unless they tolerate the taint, but allow all pods submitted to Kubelet without going through the scheduler to start, and allow all already-running pods to continue running.\n - `\"None\"` No effect, the taint is purely informational.", Default: "", Type: []string{"string"}, Format: "", - Enum: []interface{}{"NoExecute", "NoSchedule"}, + Enum: []interface{}{"NoExecute", "NoSchedule", "None"}, }, }, "timeAdded": { @@ -49393,10 +49394,10 @@ func schema_k8sio_api_resource_v1_DeviceToleration(ref common.ReferenceCallback) }, "effect": { SchemaProps: spec.SchemaProps{ - Description: "Effect indicates the taint effect to match. Empty means match all taint effects. When specified, allowed values are NoSchedule and NoExecute.\n\n\nPossible enum values:\n - `\"NoExecute\"` Evict any already-running pods that do not tolerate the device taint.\n - `\"NoSchedule\"` Do not allow new pods to schedule which use a tainted device unless they tolerate the taint, but allow all pods submitted to Kubelet without going through the scheduler to start, and allow all already-running pods to continue running.", + Description: "Effect indicates the taint effect to match. Empty means match all taint effects. When specified, allowed values are NoSchedule and NoExecute.\n\n\nPossible enum values:\n - `\"NoExecute\"` Evict any already-running pods that do not tolerate the device taint.\n - `\"NoSchedule\"` Do not allow new pods to schedule which use a tainted device unless they tolerate the taint, but allow all pods submitted to Kubelet without going through the scheduler to start, and allow all already-running pods to continue running.\n - `\"None\"` No effect, the taint is purely informational.", Type: []string{"string"}, Format: "", - Enum: []interface{}{"NoExecute", "NoSchedule"}, + Enum: []interface{}{"NoExecute", "NoSchedule", "None"}, }, }, "tolerationSeconds": { @@ -50125,7 +50126,7 @@ func schema_k8sio_api_resource_v1_ResourceSliceSpec(ref common.ReferenceCallback }, }, SchemaProps: spec.SchemaProps{ - Description: "Devices lists some or all of the devices in this pool.\n\nMust not have more than 128 entries.", + Description: "Devices lists some or all of the devices in this pool.\n\nMust not have more than 128 entries. If any device uses taints the limit is 64.", Type: []string{"array"}, Items: &spec.SchemaOrArray{ Schema: &spec.Schema{ @@ -50239,11 +50240,11 @@ func schema_k8sio_api_resource_v1alpha3_DeviceTaint(ref common.ReferenceCallback }, "effect": { SchemaProps: spec.SchemaProps{ - Description: "The effect of the taint on claims that do not tolerate the taint and through such claims on the pods using them. Valid effects are NoSchedule and NoExecute. PreferNoSchedule as used for nodes is not valid here.\n\n\nPossible enum values:\n - `\"NoExecute\"` Evict any already-running pods that do not tolerate the device taint.\n - `\"NoSchedule\"` Do not allow new pods to schedule which use a tainted device unless they tolerate the taint, but allow all pods submitted to Kubelet without going through the scheduler to start, and allow all already-running pods to continue running.", + Description: "The effect of the taint on claims that do not tolerate the taint and through such claims on the pods using them.\n\nValid effects are None, NoSchedule and NoExecute. PreferNoSchedule as used for nodes is not valid here. More effects may get added in the future. Consumers must treat unknown effects like None.\n\n\nPossible enum values:\n - `\"NoExecute\"` Evict any already-running pods that do not tolerate the device taint.\n - `\"NoSchedule\"` Do not allow new pods to schedule which use a tainted device unless they tolerate the taint, but allow all pods submitted to Kubelet without going through the scheduler to start, and allow all already-running pods to continue running.\n - `\"None\"` No effect, the taint is purely informational.", Default: "", Type: []string{"string"}, Format: "", - Enum: []interface{}{"NoExecute", "NoSchedule"}, + Enum: []interface{}{"NoExecute", "NoSchedule", "None"}, }, }, "timeAdded": { @@ -50296,12 +50297,19 @@ func schema_k8sio_api_resource_v1alpha3_DeviceTaintRule(ref common.ReferenceCall Ref: ref(v1alpha3.DeviceTaintRuleSpec{}.OpenAPIModelName()), }, }, + "status": { + SchemaProps: spec.SchemaProps{ + Description: "Status provides information about what was requested in the spec.", + Default: map[string]interface{}{}, + Ref: ref(v1alpha3.DeviceTaintRuleStatus{}.OpenAPIModelName()), + }, + }, }, Required: []string{"spec"}, }, }, Dependencies: []string{ - v1alpha3.DeviceTaintRuleSpec{}.OpenAPIModelName(), metav1.ObjectMeta{}.OpenAPIModelName()}, + v1alpha3.DeviceTaintRuleSpec{}.OpenAPIModelName(), v1alpha3.DeviceTaintRuleStatus{}.OpenAPIModelName(), metav1.ObjectMeta{}.OpenAPIModelName()}, } } @@ -50365,7 +50373,7 @@ func schema_k8sio_api_resource_v1alpha3_DeviceTaintRuleSpec(ref common.Reference Properties: map[string]spec.Schema{ "deviceSelector": { SchemaProps: spec.SchemaProps{ - Description: "DeviceSelector defines which device(s) the taint is applied to. All selector criteria must be satified for a device to match. The empty selector matches all devices. Without a selector, no devices are matches.", + Description: "DeviceSelector defines which device(s) the taint is applied to. All selector criteria must be satisfied for a device to match. The empty selector matches all devices. Without a selector, no devices are matches.", Ref: ref(v1alpha3.DeviceTaintSelector{}.OpenAPIModelName()), }, }, @@ -50385,6 +50393,45 @@ func schema_k8sio_api_resource_v1alpha3_DeviceTaintRuleSpec(ref common.Reference } } +func schema_k8sio_api_resource_v1alpha3_DeviceTaintRuleStatus(ref common.ReferenceCallback) common.OpenAPIDefinition { + return common.OpenAPIDefinition{ + Schema: spec.Schema{ + SchemaProps: spec.SchemaProps{ + Description: "DeviceTaintRuleStatus provides information about an on-going pod eviction.", + Type: []string{"object"}, + Properties: map[string]spec.Schema{ + "conditions": { + VendorExtensible: spec.VendorExtensible{ + Extensions: spec.Extensions{ + "x-kubernetes-list-map-keys": []interface{}{ + "type", + }, + "x-kubernetes-list-type": "map", + "x-kubernetes-patch-merge-key": "type", + "x-kubernetes-patch-strategy": "merge", + }, + }, + SchemaProps: spec.SchemaProps{ + Description: "Conditions provide information about the state of the DeviceTaintRule and the cluster at some point in time, in a machine-readable and human-readable format.\n\nThe following condition is currently defined as part of this API, more may get added: - Type: EvictionInProgress - Status: True if there are currently pods which need to be evicted, False otherwise\n (includes the effects which don't cause eviction).\n- Reason: not specified, may change - Message: includes information about number of pending pods and already evicted pods\n in a human-readable format, updated periodically, may change\n\nFor `effect: None`, the condition above gets set once for each change to the spec, with the message containing information about what would happen if the effect was `NoExecute`. This feedback can be used to decide whether changing the effect to `NoExecute` will work as intended. It only gets set once to avoid having to constantly update the status.\n\nMust have 8 or fewer entries.", + Type: []string{"array"}, + Items: &spec.SchemaOrArray{ + Schema: &spec.Schema{ + SchemaProps: spec.SchemaProps{ + Default: map[string]interface{}{}, + Ref: ref(metav1.Condition{}.OpenAPIModelName()), + }, + }, + }, + }, + }, + }, + }, + }, + Dependencies: []string{ + metav1.Condition{}.OpenAPIModelName()}, + } +} + func schema_k8sio_api_resource_v1alpha3_DeviceTaintSelector(ref common.ReferenceCallback) common.OpenAPIDefinition { return common.OpenAPIDefinition{ Schema: spec.Schema{ @@ -50392,13 +50439,6 @@ func schema_k8sio_api_resource_v1alpha3_DeviceTaintSelector(ref common.Reference Description: "DeviceTaintSelector defines which device(s) a DeviceTaintRule applies to. The empty selector matches all devices. Without a selector, no devices are matched.", Type: []string{"object"}, Properties: map[string]spec.Schema{ - "deviceClassName": { - SchemaProps: spec.SchemaProps{ - Description: "If DeviceClassName is set, the selectors defined there must be satisfied by a device to be selected. This field corresponds to class.metadata.name.", - Type: []string{"string"}, - Format: "", - }, - }, "driver": { SchemaProps: spec.SchemaProps{ Description: "If driver is set, only devices from that driver are selected. This fields corresponds to slice.spec.driver.", @@ -50420,30 +50460,9 @@ func schema_k8sio_api_resource_v1alpha3_DeviceTaintSelector(ref common.Reference Format: "", }, }, - "selectors": { - VendorExtensible: spec.VendorExtensible{ - Extensions: spec.Extensions{ - "x-kubernetes-list-type": "atomic", - }, - }, - SchemaProps: spec.SchemaProps{ - Description: "Selectors contains the same selection criteria as a ResourceClaim. Currently, CEL expressions are supported. All of these selectors must be satisfied.", - Type: []string{"array"}, - Items: &spec.SchemaOrArray{ - Schema: &spec.Schema{ - SchemaProps: spec.SchemaProps{ - Default: map[string]interface{}{}, - Ref: ref(v1alpha3.DeviceSelector{}.OpenAPIModelName()), - }, - }, - }, - }, - }, }, }, }, - Dependencies: []string{ - v1alpha3.DeviceSelector{}.OpenAPIModelName()}, } } @@ -50645,7 +50664,7 @@ func schema_k8sio_api_resource_v1beta1_BasicDevice(ref common.ReferenceCallback) }, }, SchemaProps: spec.SchemaProps{ - Description: "If specified, these are the driver-defined taints.\n\nThe maximum number of taints is 4.\n\nThis is an alpha field and requires enabling the DRADeviceTaints feature gate.", + Description: "If specified, these are the driver-defined taints.\n\nThe maximum number of taints is 16. If taints are set for any device in a ResourceSlice, then the maximum number of allowed devices per ResourceSlice is 64 instead of 128.\n\nThis is an alpha field and requires enabling the DRADeviceTaints feature gate.", Type: []string{"array"}, Items: &spec.SchemaOrArray{ Schema: &spec.Schema{ @@ -51897,11 +51916,11 @@ func schema_k8sio_api_resource_v1beta1_DeviceTaint(ref common.ReferenceCallback) }, "effect": { SchemaProps: spec.SchemaProps{ - Description: "The effect of the taint on claims that do not tolerate the taint and through such claims on the pods using them. Valid effects are NoSchedule and NoExecute. PreferNoSchedule as used for nodes is not valid here.\n\n\nPossible enum values:\n - `\"NoExecute\"` Evict any already-running pods that do not tolerate the device taint.\n - `\"NoSchedule\"` Do not allow new pods to schedule which use a tainted device unless they tolerate the taint, but allow all pods submitted to Kubelet without going through the scheduler to start, and allow all already-running pods to continue running.", + Description: "The effect of the taint on claims that do not tolerate the taint and through such claims on the pods using them.\n\nValid effects are None, NoSchedule and NoExecute. PreferNoSchedule as used for nodes is not valid here. More effects may get added in the future. Consumers must treat unknown effects like None.\n\n\nPossible enum values:\n - `\"NoExecute\"` Evict any already-running pods that do not tolerate the device taint.\n - `\"NoSchedule\"` Do not allow new pods to schedule which use a tainted device unless they tolerate the taint, but allow all pods submitted to Kubelet without going through the scheduler to start, and allow all already-running pods to continue running.\n - `\"None\"` No effect, the taint is purely informational.", Default: "", Type: []string{"string"}, Format: "", - Enum: []interface{}{"NoExecute", "NoSchedule"}, + Enum: []interface{}{"NoExecute", "NoSchedule", "None"}, }, }, "timeAdded": { @@ -51951,10 +51970,10 @@ func schema_k8sio_api_resource_v1beta1_DeviceToleration(ref common.ReferenceCall }, "effect": { SchemaProps: spec.SchemaProps{ - Description: "Effect indicates the taint effect to match. Empty means match all taint effects. When specified, allowed values are NoSchedule and NoExecute.\n\n\nPossible enum values:\n - `\"NoExecute\"` Evict any already-running pods that do not tolerate the device taint.\n - `\"NoSchedule\"` Do not allow new pods to schedule which use a tainted device unless they tolerate the taint, but allow all pods submitted to Kubelet without going through the scheduler to start, and allow all already-running pods to continue running.", + Description: "Effect indicates the taint effect to match. Empty means match all taint effects. When specified, allowed values are NoSchedule and NoExecute.\n\n\nPossible enum values:\n - `\"NoExecute\"` Evict any already-running pods that do not tolerate the device taint.\n - `\"NoSchedule\"` Do not allow new pods to schedule which use a tainted device unless they tolerate the taint, but allow all pods submitted to Kubelet without going through the scheduler to start, and allow all already-running pods to continue running.\n - `\"None\"` No effect, the taint is purely informational.", Type: []string{"string"}, Format: "", - Enum: []interface{}{"NoExecute", "NoSchedule"}, + Enum: []interface{}{"NoExecute", "NoSchedule", "None"}, }, }, "tolerationSeconds": { @@ -52593,7 +52612,7 @@ func schema_k8sio_api_resource_v1beta1_ResourceSliceSpec(ref common.ReferenceCal }, }, SchemaProps: spec.SchemaProps{ - Description: "Devices lists some or all of the devices in this pool.\n\nMust not have more than 128 entries.", + Description: "Devices lists some or all of the devices in this pool.\n\nMust not have more than 128 entries. If any device uses taints the limit is 64.", Type: []string{"array"}, Items: &spec.SchemaOrArray{ Schema: &spec.Schema{ @@ -53037,7 +53056,7 @@ func schema_k8sio_api_resource_v1beta2_Device(ref common.ReferenceCallback) comm }, }, SchemaProps: spec.SchemaProps{ - Description: "If specified, these are the driver-defined taints.\n\nThe maximum number of taints is 4.\n\nThis is an alpha field and requires enabling the DRADeviceTaints feature gate.", + Description: "If specified, these are the driver-defined taints.\n\nThe maximum number of taints is 16. If taints are set for any device in a ResourceSlice, then the maximum number of allowed devices per ResourceSlice is 64 instead of 128.\n\nThis is an alpha field and requires enabling the DRADeviceTaints feature gate.", Type: []string{"array"}, Items: &spec.SchemaOrArray{ Schema: &spec.Schema{ @@ -54001,11 +54020,11 @@ func schema_k8sio_api_resource_v1beta2_DeviceTaint(ref common.ReferenceCallback) }, "effect": { SchemaProps: spec.SchemaProps{ - Description: "The effect of the taint on claims that do not tolerate the taint and through such claims on the pods using them. Valid effects are NoSchedule and NoExecute. PreferNoSchedule as used for nodes is not valid here.\n\n\nPossible enum values:\n - `\"NoExecute\"` Evict any already-running pods that do not tolerate the device taint.\n - `\"NoSchedule\"` Do not allow new pods to schedule which use a tainted device unless they tolerate the taint, but allow all pods submitted to Kubelet without going through the scheduler to start, and allow all already-running pods to continue running.", + Description: "The effect of the taint on claims that do not tolerate the taint and through such claims on the pods using them.\n\nValid effects are None, NoSchedule and NoExecute. PreferNoSchedule as used for nodes is not valid here. More effects may get added in the future. Consumers must treat unknown effects like None.\n\n\nPossible enum values:\n - `\"NoExecute\"` Evict any already-running pods that do not tolerate the device taint.\n - `\"NoSchedule\"` Do not allow new pods to schedule which use a tainted device unless they tolerate the taint, but allow all pods submitted to Kubelet without going through the scheduler to start, and allow all already-running pods to continue running.\n - `\"None\"` No effect, the taint is purely informational.", Default: "", Type: []string{"string"}, Format: "", - Enum: []interface{}{"NoExecute", "NoSchedule"}, + Enum: []interface{}{"NoExecute", "NoSchedule", "None"}, }, }, "timeAdded": { @@ -54055,10 +54074,10 @@ func schema_k8sio_api_resource_v1beta2_DeviceToleration(ref common.ReferenceCall }, "effect": { SchemaProps: spec.SchemaProps{ - Description: "Effect indicates the taint effect to match. Empty means match all taint effects. When specified, allowed values are NoSchedule and NoExecute.\n\n\nPossible enum values:\n - `\"NoExecute\"` Evict any already-running pods that do not tolerate the device taint.\n - `\"NoSchedule\"` Do not allow new pods to schedule which use a tainted device unless they tolerate the taint, but allow all pods submitted to Kubelet without going through the scheduler to start, and allow all already-running pods to continue running.", + Description: "Effect indicates the taint effect to match. Empty means match all taint effects. When specified, allowed values are NoSchedule and NoExecute.\n\n\nPossible enum values:\n - `\"NoExecute\"` Evict any already-running pods that do not tolerate the device taint.\n - `\"NoSchedule\"` Do not allow new pods to schedule which use a tainted device unless they tolerate the taint, but allow all pods submitted to Kubelet without going through the scheduler to start, and allow all already-running pods to continue running.\n - `\"None\"` No effect, the taint is purely informational.", Type: []string{"string"}, Format: "", - Enum: []interface{}{"NoExecute", "NoSchedule"}, + Enum: []interface{}{"NoExecute", "NoSchedule", "None"}, }, }, "tolerationSeconds": { @@ -54787,7 +54806,7 @@ func schema_k8sio_api_resource_v1beta2_ResourceSliceSpec(ref common.ReferenceCal }, }, SchemaProps: spec.SchemaProps{ - Description: "Devices lists some or all of the devices in this pool.\n\nMust not have more than 128 entries.", + Description: "Devices lists some or all of the devices in this pool.\n\nMust not have more than 128 entries. If any device uses taints the limit is 64.", Type: []string{"array"}, Items: &spec.SchemaOrArray{ Schema: &spec.Schema{ diff --git a/staging/src/k8s.io/api/resource/v1/generated.proto b/staging/src/k8s.io/api/resource/v1/generated.proto index 2e716e19459..8d8b7d19fa3 100644 --- a/staging/src/k8s.io/api/resource/v1/generated.proto +++ b/staging/src/k8s.io/api/resource/v1/generated.proto @@ -395,7 +395,9 @@ message Device { // If specified, these are the driver-defined taints. // - // The maximum number of taints is 4. + // The maximum number of taints is 16. If taints are set for + // any device in a ResourceSlice, then the maximum number of + // allowed devices per ResourceSlice is 64 instead of 128. // // This is an alpha field and requires enabling the DRADeviceTaints // feature gate. @@ -1103,8 +1105,10 @@ message DeviceTaint { // The effect of the taint on claims that do not tolerate the taint // and through such claims on the pods using them. - // Valid effects are NoSchedule and NoExecute. PreferNoSchedule as used for - // nodes is not valid here. + // + // Valid effects are None, NoSchedule and NoExecute. PreferNoSchedule as used for + // nodes is not valid here. More effects may get added in the future. + // Consumers must treat unknown effects like None. // // +required // +k8s:required @@ -1640,7 +1644,7 @@ message ResourceSliceSpec { // Devices lists some or all of the devices in this pool. // - // Must not have more than 128 entries. + // Must not have more than 128 entries. If any device uses taints the limit is 64. // // +optional // +listType=atomic diff --git a/staging/src/k8s.io/api/resource/v1/types_swagger_doc_generated.go b/staging/src/k8s.io/api/resource/v1/types_swagger_doc_generated.go index c37f64393fe..4e64b0d6b57 100644 --- a/staging/src/k8s.io/api/resource/v1/types_swagger_doc_generated.go +++ b/staging/src/k8s.io/api/resource/v1/types_swagger_doc_generated.go @@ -121,7 +121,7 @@ var map_Device = map[string]string{ "nodeName": "NodeName identifies the node where the device is available.\n\nMust only be set if Spec.PerDeviceNodeSelection is set to true. At most one of NodeName, NodeSelector and AllNodes can be set.", "nodeSelector": "NodeSelector defines the nodes where the device is available.\n\nMust use exactly one term.\n\nMust only be set if Spec.PerDeviceNodeSelection is set to true. At most one of NodeName, NodeSelector and AllNodes can be set.", "allNodes": "AllNodes indicates that all nodes have access to the device.\n\nMust only be set if Spec.PerDeviceNodeSelection is set to true. At most one of NodeName, NodeSelector and AllNodes can be set.", - "taints": "If specified, these are the driver-defined taints.\n\nThe maximum number of taints is 4.\n\nThis is an alpha field and requires enabling the DRADeviceTaints feature gate.", + "taints": "If specified, these are the driver-defined taints.\n\nThe maximum number of taints is 16. If taints are set for any device in a ResourceSlice, then the maximum number of allowed devices per ResourceSlice is 64 instead of 128.\n\nThis is an alpha field and requires enabling the DRADeviceTaints feature gate.", "bindsToNode": "BindsToNode indicates if the usage of an allocation involving this device has to be limited to exactly the node that was chosen when allocating the claim. If set to true, the scheduler will set the ResourceClaim.Status.Allocation.NodeSelector to match the node where the allocation was made.\n\nThis is an alpha field and requires enabling the DRADeviceBindingConditions and DRAResourceClaimDeviceStatus feature gates.", "bindingConditions": "BindingConditions defines the conditions for proceeding with binding. All of these conditions must be set in the per-device status conditions with a value of True to proceed with binding the pod to the node while scheduling the pod.\n\nThe maximum number of binding conditions is 4.\n\nThe conditions must be a valid condition type string.\n\nThis is an alpha field and requires enabling the DRADeviceBindingConditions and DRAResourceClaimDeviceStatus feature gates.", "bindingFailureConditions": "BindingFailureConditions defines the conditions for binding failure. They may be set in the per-device status conditions. If any is set to \"True\", a binding failure occurred.\n\nThe maximum number of binding failure conditions is 4.\n\nThe conditions must be a valid condition type string.\n\nThis is an alpha field and requires enabling the DRADeviceBindingConditions and DRAResourceClaimDeviceStatus feature gates.", @@ -320,7 +320,7 @@ var map_DeviceTaint = map[string]string{ "": "The device this taint is attached to has the \"effect\" on any claim which does not tolerate the taint and, through the claim, to pods using the claim.", "key": "The taint key to be applied to a device. Must be a label name.", "value": "The taint value corresponding to the taint key. Must be a label value.", - "effect": "The effect of the taint on claims that do not tolerate the taint and through such claims on the pods using them. Valid effects are NoSchedule and NoExecute. PreferNoSchedule as used for nodes is not valid here.", + "effect": "The effect of the taint on claims that do not tolerate the taint and through such claims on the pods using them.\n\nValid effects are None, NoSchedule and NoExecute. PreferNoSchedule as used for nodes is not valid here. More effects may get added in the future. Consumers must treat unknown effects like None.", "timeAdded": "TimeAdded represents the time at which the taint was added. Added automatically during create or update if not set.", } @@ -498,7 +498,7 @@ var map_ResourceSliceSpec = map[string]string{ "nodeName": "NodeName identifies the node which provides the resources in this pool. A field selector can be used to list only ResourceSlice objects belonging to a certain node.\n\nThis field can be used to limit access from nodes to ResourceSlices with the same node name. It also indicates to autoscalers that adding new nodes of the same type as some old node might also make new resources available.\n\nExactly one of NodeName, NodeSelector, AllNodes, and PerDeviceNodeSelection must be set. This field is immutable.", "nodeSelector": "NodeSelector defines which nodes have access to the resources in the pool, when that pool is not limited to a single node.\n\nMust use exactly one term.\n\nExactly one of NodeName, NodeSelector, AllNodes, and PerDeviceNodeSelection must be set.", "allNodes": "AllNodes indicates that all nodes have access to the resources in the pool.\n\nExactly one of NodeName, NodeSelector, AllNodes, and PerDeviceNodeSelection must be set.", - "devices": "Devices lists some or all of the devices in this pool.\n\nMust not have more than 128 entries.", + "devices": "Devices lists some or all of the devices in this pool.\n\nMust not have more than 128 entries. If any device uses taints the limit is 64.", "perDeviceNodeSelection": "PerDeviceNodeSelection defines whether the access from nodes to resources in the pool is set on the ResourceSlice level or on each device. If it is set to true, every device defined the ResourceSlice must specify this individually.\n\nExactly one of NodeName, NodeSelector, AllNodes, and PerDeviceNodeSelection must be set.", "sharedCounters": "SharedCounters defines a list of counter sets, each of which has a name and a list of counters available.\n\nThe names of the SharedCounters must be unique in the ResourceSlice.\n\nThe maximum number of counters in all sets is 32.", } diff --git a/staging/src/k8s.io/api/resource/v1alpha3/generated.pb.go b/staging/src/k8s.io/api/resource/v1alpha3/generated.pb.go index 826af237b98..66b9f8f6203 100644 --- a/staging/src/k8s.io/api/resource/v1alpha3/generated.pb.go +++ b/staging/src/k8s.io/api/resource/v1alpha3/generated.pb.go @@ -43,6 +43,8 @@ func (m *DeviceTaintRuleList) Reset() { *m = DeviceTaintRuleList{} } func (m *DeviceTaintRuleSpec) Reset() { *m = DeviceTaintRuleSpec{} } +func (m *DeviceTaintRuleStatus) Reset() { *m = DeviceTaintRuleStatus{} } + func (m *DeviceTaintSelector) Reset() { *m = DeviceTaintSelector{} } func (m *CELDeviceSelector) Marshal() (dAtA []byte, err error) { @@ -178,6 +180,16 @@ func (m *DeviceTaintRule) MarshalToSizedBuffer(dAtA []byte) (int, error) { _ = i var l int _ = l + { + size, err := m.Status.MarshalToSizedBuffer(dAtA[:i]) + if err != nil { + return 0, err + } + i -= size + i = encodeVarintGenerated(dAtA, i, uint64(size)) + } + i-- + dAtA[i] = 0x1a { size, err := m.Spec.MarshalToSizedBuffer(dAtA[:i]) if err != nil { @@ -293,6 +305,43 @@ func (m *DeviceTaintRuleSpec) MarshalToSizedBuffer(dAtA []byte) (int, error) { return len(dAtA) - i, nil } +func (m *DeviceTaintRuleStatus) Marshal() (dAtA []byte, err error) { + size := m.Size() + dAtA = make([]byte, size) + n, err := m.MarshalToSizedBuffer(dAtA[:size]) + if err != nil { + return nil, err + } + return dAtA[:n], nil +} + +func (m *DeviceTaintRuleStatus) MarshalTo(dAtA []byte) (int, error) { + size := m.Size() + return m.MarshalToSizedBuffer(dAtA[:size]) +} + +func (m *DeviceTaintRuleStatus) MarshalToSizedBuffer(dAtA []byte) (int, error) { + i := len(dAtA) + _ = i + var l int + _ = l + if len(m.Conditions) > 0 { + for iNdEx := len(m.Conditions) - 1; iNdEx >= 0; iNdEx-- { + { + size, err := m.Conditions[iNdEx].MarshalToSizedBuffer(dAtA[:i]) + if err != nil { + return 0, err + } + i -= size + i = encodeVarintGenerated(dAtA, i, uint64(size)) + } + i-- + dAtA[i] = 0xa + } + } + return len(dAtA) - i, nil +} + func (m *DeviceTaintSelector) Marshal() (dAtA []byte, err error) { size := m.Size() dAtA = make([]byte, size) @@ -313,20 +362,6 @@ func (m *DeviceTaintSelector) MarshalToSizedBuffer(dAtA []byte) (int, error) { _ = i var l int _ = l - if len(m.Selectors) > 0 { - for iNdEx := len(m.Selectors) - 1; iNdEx >= 0; iNdEx-- { - { - size, err := m.Selectors[iNdEx].MarshalToSizedBuffer(dAtA[:i]) - if err != nil { - return 0, err - } - i -= size - i = encodeVarintGenerated(dAtA, i, uint64(size)) - } - i-- - dAtA[i] = 0x2a - } - } if m.Device != nil { i -= len(*m.Device) copy(dAtA[i:], *m.Device) @@ -348,13 +383,6 @@ func (m *DeviceTaintSelector) MarshalToSizedBuffer(dAtA []byte) (int, error) { i-- dAtA[i] = 0x12 } - if m.DeviceClassName != nil { - i -= len(*m.DeviceClassName) - copy(dAtA[i:], *m.DeviceClassName) - i = encodeVarintGenerated(dAtA, i, uint64(len(*m.DeviceClassName))) - i-- - dAtA[i] = 0xa - } return len(dAtA) - i, nil } @@ -422,6 +450,8 @@ func (m *DeviceTaintRule) Size() (n int) { n += 1 + l + sovGenerated(uint64(l)) l = m.Spec.Size() n += 1 + l + sovGenerated(uint64(l)) + l = m.Status.Size() + n += 1 + l + sovGenerated(uint64(l)) return n } @@ -457,16 +487,27 @@ func (m *DeviceTaintRuleSpec) Size() (n int) { return n } +func (m *DeviceTaintRuleStatus) Size() (n int) { + if m == nil { + return 0 + } + var l int + _ = l + if len(m.Conditions) > 0 { + for _, e := range m.Conditions { + l = e.Size() + n += 1 + l + sovGenerated(uint64(l)) + } + } + return n +} + func (m *DeviceTaintSelector) Size() (n int) { if m == nil { return 0 } var l int _ = l - if m.DeviceClassName != nil { - l = len(*m.DeviceClassName) - n += 1 + l + sovGenerated(uint64(l)) - } if m.Driver != nil { l = len(*m.Driver) n += 1 + l + sovGenerated(uint64(l)) @@ -479,12 +520,6 @@ func (m *DeviceTaintSelector) Size() (n int) { l = len(*m.Device) n += 1 + l + sovGenerated(uint64(l)) } - if len(m.Selectors) > 0 { - for _, e := range m.Selectors { - l = e.Size() - n += 1 + l + sovGenerated(uint64(l)) - } - } return n } @@ -521,6 +556,7 @@ func (this *DeviceTaintRule) String() string { s := strings.Join([]string{`&DeviceTaintRule{`, `ObjectMeta:` + strings.Replace(strings.Replace(fmt.Sprintf("%v", this.ObjectMeta), "ObjectMeta", "v1.ObjectMeta", 1), `&`, ``, 1) + `,`, `Spec:` + strings.Replace(strings.Replace(this.Spec.String(), "DeviceTaintRuleSpec", "DeviceTaintRuleSpec", 1), `&`, ``, 1) + `,`, + `Status:` + strings.Replace(strings.Replace(this.Status.String(), "DeviceTaintRuleStatus", "DeviceTaintRuleStatus", 1), `&`, ``, 1) + `,`, `}`, }, "") return s @@ -552,21 +588,29 @@ func (this *DeviceTaintRuleSpec) String() string { }, "") return s } +func (this *DeviceTaintRuleStatus) String() string { + if this == nil { + return "nil" + } + repeatedStringForConditions := "[]Condition{" + for _, f := range this.Conditions { + repeatedStringForConditions += fmt.Sprintf("%v", f) + "," + } + repeatedStringForConditions += "}" + s := strings.Join([]string{`&DeviceTaintRuleStatus{`, + `Conditions:` + repeatedStringForConditions + `,`, + `}`, + }, "") + return s +} func (this *DeviceTaintSelector) String() string { if this == nil { return "nil" } - repeatedStringForSelectors := "[]DeviceSelector{" - for _, f := range this.Selectors { - repeatedStringForSelectors += strings.Replace(strings.Replace(f.String(), "DeviceSelector", "DeviceSelector", 1), `&`, ``, 1) + "," - } - repeatedStringForSelectors += "}" s := strings.Join([]string{`&DeviceTaintSelector{`, - `DeviceClassName:` + valueToStringGenerated(this.DeviceClassName) + `,`, `Driver:` + valueToStringGenerated(this.Driver) + `,`, `Pool:` + valueToStringGenerated(this.Pool) + `,`, `Device:` + valueToStringGenerated(this.Device) + `,`, - `Selectors:` + repeatedStringForSelectors + `,`, `}`, }, "") return s @@ -1024,6 +1068,39 @@ func (m *DeviceTaintRule) Unmarshal(dAtA []byte) error { return err } iNdEx = postIndex + case 3: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field Status", wireType) + } + var msglen int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowGenerated + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + msglen |= int(b&0x7F) << shift + if b < 0x80 { + break + } + } + if msglen < 0 { + return ErrInvalidLengthGenerated + } + postIndex := iNdEx + msglen + if postIndex < 0 { + return ErrInvalidLengthGenerated + } + if postIndex > l { + return io.ErrUnexpectedEOF + } + if err := m.Status.Unmarshal(dAtA[iNdEx:postIndex]); err != nil { + return err + } + iNdEx = postIndex default: iNdEx = preIndex skippy, err := skipGenerated(dAtA[iNdEx:]) @@ -1281,6 +1358,90 @@ func (m *DeviceTaintRuleSpec) Unmarshal(dAtA []byte) error { } return nil } +func (m *DeviceTaintRuleStatus) Unmarshal(dAtA []byte) error { + l := len(dAtA) + iNdEx := 0 + for iNdEx < l { + preIndex := iNdEx + var wire uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowGenerated + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + wire |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + fieldNum := int32(wire >> 3) + wireType := int(wire & 0x7) + if wireType == 4 { + return fmt.Errorf("proto: DeviceTaintRuleStatus: wiretype end group for non-group") + } + if fieldNum <= 0 { + return fmt.Errorf("proto: DeviceTaintRuleStatus: illegal tag %d (wire type %d)", fieldNum, wire) + } + switch fieldNum { + case 1: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field Conditions", wireType) + } + var msglen int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowGenerated + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + msglen |= int(b&0x7F) << shift + if b < 0x80 { + break + } + } + if msglen < 0 { + return ErrInvalidLengthGenerated + } + postIndex := iNdEx + msglen + if postIndex < 0 { + return ErrInvalidLengthGenerated + } + if postIndex > l { + return io.ErrUnexpectedEOF + } + m.Conditions = append(m.Conditions, v1.Condition{}) + if err := m.Conditions[len(m.Conditions)-1].Unmarshal(dAtA[iNdEx:postIndex]); err != nil { + return err + } + iNdEx = postIndex + default: + iNdEx = preIndex + skippy, err := skipGenerated(dAtA[iNdEx:]) + if err != nil { + return err + } + if (skippy < 0) || (iNdEx+skippy) < 0 { + return ErrInvalidLengthGenerated + } + if (iNdEx + skippy) > l { + return io.ErrUnexpectedEOF + } + iNdEx += skippy + } + } + + if iNdEx > l { + return io.ErrUnexpectedEOF + } + return nil +} func (m *DeviceTaintSelector) Unmarshal(dAtA []byte) error { l := len(dAtA) iNdEx := 0 @@ -1310,39 +1471,6 @@ func (m *DeviceTaintSelector) Unmarshal(dAtA []byte) error { return fmt.Errorf("proto: DeviceTaintSelector: illegal tag %d (wire type %d)", fieldNum, wire) } switch fieldNum { - case 1: - if wireType != 2 { - return fmt.Errorf("proto: wrong wireType = %d for field DeviceClassName", wireType) - } - var stringLen uint64 - for shift := uint(0); ; shift += 7 { - if shift >= 64 { - return ErrIntOverflowGenerated - } - if iNdEx >= l { - return io.ErrUnexpectedEOF - } - b := dAtA[iNdEx] - iNdEx++ - stringLen |= uint64(b&0x7F) << shift - if b < 0x80 { - break - } - } - intStringLen := int(stringLen) - if intStringLen < 0 { - return ErrInvalidLengthGenerated - } - postIndex := iNdEx + intStringLen - if postIndex < 0 { - return ErrInvalidLengthGenerated - } - if postIndex > l { - return io.ErrUnexpectedEOF - } - s := string(dAtA[iNdEx:postIndex]) - m.DeviceClassName = &s - iNdEx = postIndex case 2: if wireType != 2 { return fmt.Errorf("proto: wrong wireType = %d for field Driver", wireType) @@ -1442,40 +1570,6 @@ func (m *DeviceTaintSelector) Unmarshal(dAtA []byte) error { s := string(dAtA[iNdEx:postIndex]) m.Device = &s iNdEx = postIndex - case 5: - if wireType != 2 { - return fmt.Errorf("proto: wrong wireType = %d for field Selectors", wireType) - } - var msglen int - for shift := uint(0); ; shift += 7 { - if shift >= 64 { - return ErrIntOverflowGenerated - } - if iNdEx >= l { - return io.ErrUnexpectedEOF - } - b := dAtA[iNdEx] - iNdEx++ - msglen |= int(b&0x7F) << shift - if b < 0x80 { - break - } - } - if msglen < 0 { - return ErrInvalidLengthGenerated - } - postIndex := iNdEx + msglen - if postIndex < 0 { - return ErrInvalidLengthGenerated - } - if postIndex > l { - return io.ErrUnexpectedEOF - } - m.Selectors = append(m.Selectors, DeviceSelector{}) - if err := m.Selectors[len(m.Selectors)-1].Unmarshal(dAtA[iNdEx:postIndex]); err != nil { - return err - } - iNdEx = postIndex default: iNdEx = preIndex skippy, err := skipGenerated(dAtA[iNdEx:]) diff --git a/staging/src/k8s.io/api/resource/v1alpha3/generated.proto b/staging/src/k8s.io/api/resource/v1alpha3/generated.proto index d334479007e..6414216db29 100644 --- a/staging/src/k8s.io/api/resource/v1alpha3/generated.proto +++ b/staging/src/k8s.io/api/resource/v1alpha3/generated.proto @@ -114,8 +114,10 @@ message DeviceTaint { // The effect of the taint on claims that do not tolerate the taint // and through such claims on the pods using them. - // Valid effects are NoSchedule and NoExecute. PreferNoSchedule as used for - // nodes is not valid here. + // + // Valid effects are None, NoSchedule and NoExecute. PreferNoSchedule as used for + // nodes is not valid here. More effects may get added in the future. + // Consumers must treat unknown effects like None. // // +required optional string effect = 3; @@ -139,6 +141,11 @@ message DeviceTaintRule { // // Changing the spec automatically increments the metadata.generation number. optional DeviceTaintRuleSpec spec = 2; + + // Status provides information about what was requested in the spec. + // + // +optional + optional DeviceTaintRuleStatus status = 3; } // DeviceTaintRuleList is a collection of DeviceTaintRules. @@ -154,7 +161,7 @@ message DeviceTaintRuleList { // DeviceTaintRuleSpec specifies the selector and one taint. message DeviceTaintRuleSpec { // DeviceSelector defines which device(s) the taint is applied to. - // All selector criteria must be satified for a device to + // All selector criteria must be satisfied for a device to // match. The empty selector matches all devices. Without // a selector, no devices are matches. // @@ -167,17 +174,41 @@ message DeviceTaintRuleSpec { optional DeviceTaint taint = 2; } +// DeviceTaintRuleStatus provides information about an on-going pod eviction. +message DeviceTaintRuleStatus { + // Conditions provide information about the state of the DeviceTaintRule + // and the cluster at some point in time, + // in a machine-readable and human-readable format. + // + // The following condition is currently defined as part of this API, more may + // get added: + // - Type: EvictionInProgress + // - Status: True if there are currently pods which need to be evicted, False otherwise + // (includes the effects which don't cause eviction). + // - Reason: not specified, may change + // - Message: includes information about number of pending pods and already evicted pods + // in a human-readable format, updated periodically, may change + // + // For `effect: None`, the condition above gets set once for each change to + // the spec, with the message containing information about what would happen + // if the effect was `NoExecute`. This feedback can be used to decide whether + // changing the effect to `NoExecute` will work as intended. It only gets + // set once to avoid having to constantly update the status. + // + // Must have 8 or fewer entries. + // + // +optional + // +listType=map + // +listMapKey=type + // +patchStrategy=merge + // +patchMergeKey=type + repeated .k8s.io.apimachinery.pkg.apis.meta.v1.Condition conditions = 1; +} + // DeviceTaintSelector defines which device(s) a DeviceTaintRule applies to. // The empty selector matches all devices. Without a selector, no devices // are matched. message DeviceTaintSelector { - // If DeviceClassName is set, the selectors defined there must be - // satisfied by a device to be selected. This field corresponds - // to class.metadata.name. - // - // +optional - optional string deviceClassName = 1; - // If driver is set, only devices from that driver are selected. // This fields corresponds to slice.spec.driver. // @@ -204,13 +235,5 @@ message DeviceTaintSelector { // // +optional optional string device = 4; - - // Selectors contains the same selection criteria as a ResourceClaim. - // Currently, CEL expressions are supported. All of these selectors - // must be satisfied. - // - // +optional - // +listType=atomic - repeated DeviceSelector selectors = 5; } diff --git a/staging/src/k8s.io/api/resource/v1alpha3/generated.protomessage.pb.go b/staging/src/k8s.io/api/resource/v1alpha3/generated.protomessage.pb.go index 2c6196bb862..aba6231f3d3 100644 --- a/staging/src/k8s.io/api/resource/v1alpha3/generated.protomessage.pb.go +++ b/staging/src/k8s.io/api/resource/v1alpha3/generated.protomessage.pb.go @@ -33,4 +33,6 @@ func (*DeviceTaintRuleList) ProtoMessage() {} func (*DeviceTaintRuleSpec) ProtoMessage() {} +func (*DeviceTaintRuleStatus) ProtoMessage() {} + func (*DeviceTaintSelector) ProtoMessage() {} diff --git a/staging/src/k8s.io/api/resource/v1alpha3/types_swagger_doc_generated.go b/staging/src/k8s.io/api/resource/v1alpha3/types_swagger_doc_generated.go index 6c4c4eb1b11..30981bd7e60 100644 --- a/staging/src/k8s.io/api/resource/v1alpha3/types_swagger_doc_generated.go +++ b/staging/src/k8s.io/api/resource/v1alpha3/types_swagger_doc_generated.go @@ -49,7 +49,7 @@ var map_DeviceTaint = map[string]string{ "": "The device this taint is attached to has the \"effect\" on any claim which does not tolerate the taint and, through the claim, to pods using the claim.", "key": "The taint key to be applied to a device. Must be a label name.", "value": "The taint value corresponding to the taint key. Must be a label value.", - "effect": "The effect of the taint on claims that do not tolerate the taint and through such claims on the pods using them. Valid effects are NoSchedule and NoExecute. PreferNoSchedule as used for nodes is not valid here.", + "effect": "The effect of the taint on claims that do not tolerate the taint and through such claims on the pods using them.\n\nValid effects are None, NoSchedule and NoExecute. PreferNoSchedule as used for nodes is not valid here. More effects may get added in the future. Consumers must treat unknown effects like None.", "timeAdded": "TimeAdded represents the time at which the taint was added. Added automatically during create or update if not set.", } @@ -61,6 +61,7 @@ var map_DeviceTaintRule = map[string]string{ "": "DeviceTaintRule adds one taint to all devices which match the selector. This has the same effect as if the taint was specified directly in the ResourceSlice by the DRA driver.", "metadata": "Standard object metadata", "spec": "Spec specifies the selector and one taint.\n\nChanging the spec automatically increments the metadata.generation number.", + "status": "Status provides information about what was requested in the spec.", } func (DeviceTaintRule) SwaggerDoc() map[string]string { @@ -79,7 +80,7 @@ func (DeviceTaintRuleList) SwaggerDoc() map[string]string { var map_DeviceTaintRuleSpec = map[string]string{ "": "DeviceTaintRuleSpec specifies the selector and one taint.", - "deviceSelector": "DeviceSelector defines which device(s) the taint is applied to. All selector criteria must be satified for a device to match. The empty selector matches all devices. Without a selector, no devices are matches.", + "deviceSelector": "DeviceSelector defines which device(s) the taint is applied to. All selector criteria must be satisfied for a device to match. The empty selector matches all devices. Without a selector, no devices are matches.", "taint": "The taint that gets applied to matching devices.", } @@ -87,13 +88,20 @@ func (DeviceTaintRuleSpec) SwaggerDoc() map[string]string { return map_DeviceTaintRuleSpec } +var map_DeviceTaintRuleStatus = map[string]string{ + "": "DeviceTaintRuleStatus provides information about an on-going pod eviction.", + "conditions": "Conditions provide information about the state of the DeviceTaintRule and the cluster at some point in time, in a machine-readable and human-readable format.\n\nThe following condition is currently defined as part of this API, more may get added: - Type: EvictionInProgress - Status: True if there are currently pods which need to be evicted, False otherwise\n (includes the effects which don't cause eviction).\n- Reason: not specified, may change - Message: includes information about number of pending pods and already evicted pods\n in a human-readable format, updated periodically, may change\n\nFor `effect: None`, the condition above gets set once for each change to the spec, with the message containing information about what would happen if the effect was `NoExecute`. This feedback can be used to decide whether changing the effect to `NoExecute` will work as intended. It only gets set once to avoid having to constantly update the status.\n\nMust have 8 or fewer entries.", +} + +func (DeviceTaintRuleStatus) SwaggerDoc() map[string]string { + return map_DeviceTaintRuleStatus +} + var map_DeviceTaintSelector = map[string]string{ - "": "DeviceTaintSelector defines which device(s) a DeviceTaintRule applies to. The empty selector matches all devices. Without a selector, no devices are matched.", - "deviceClassName": "If DeviceClassName is set, the selectors defined there must be satisfied by a device to be selected. This field corresponds to class.metadata.name.", - "driver": "If driver is set, only devices from that driver are selected. This fields corresponds to slice.spec.driver.", - "pool": "If pool is set, only devices in that pool are selected.\n\nAlso setting the driver name may be useful to avoid ambiguity when different drivers use the same pool name, but this is not required because selecting pools from different drivers may also be useful, for example when drivers with node-local devices use the node name as their pool name.", - "device": "If device is set, only devices with that name are selected. This field corresponds to slice.spec.devices[].name.\n\nSetting also driver and pool may be required to avoid ambiguity, but is not required.", - "selectors": "Selectors contains the same selection criteria as a ResourceClaim. Currently, CEL expressions are supported. All of these selectors must be satisfied.", + "": "DeviceTaintSelector defines which device(s) a DeviceTaintRule applies to. The empty selector matches all devices. Without a selector, no devices are matched.", + "driver": "If driver is set, only devices from that driver are selected. This fields corresponds to slice.spec.driver.", + "pool": "If pool is set, only devices in that pool are selected.\n\nAlso setting the driver name may be useful to avoid ambiguity when different drivers use the same pool name, but this is not required because selecting pools from different drivers may also be useful, for example when drivers with node-local devices use the node name as their pool name.", + "device": "If device is set, only devices with that name are selected. This field corresponds to slice.spec.devices[].name.\n\nSetting also driver and pool may be required to avoid ambiguity, but is not required.", } func (DeviceTaintSelector) SwaggerDoc() map[string]string { diff --git a/staging/src/k8s.io/api/resource/v1alpha3/zz_generated.deepcopy.go b/staging/src/k8s.io/api/resource/v1alpha3/zz_generated.deepcopy.go index e10736b97e4..6813ab043c6 100644 --- a/staging/src/k8s.io/api/resource/v1alpha3/zz_generated.deepcopy.go +++ b/staging/src/k8s.io/api/resource/v1alpha3/zz_generated.deepcopy.go @@ -22,6 +22,7 @@ limitations under the License. package v1alpha3 import ( + v1 "k8s.io/apimachinery/pkg/apis/meta/v1" runtime "k8s.io/apimachinery/pkg/runtime" ) @@ -88,6 +89,7 @@ func (in *DeviceTaintRule) DeepCopyInto(out *DeviceTaintRule) { out.TypeMeta = in.TypeMeta in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) in.Spec.DeepCopyInto(&out.Spec) + in.Status.DeepCopyInto(&out.Status) return } @@ -164,14 +166,32 @@ func (in *DeviceTaintRuleSpec) DeepCopy() *DeviceTaintRuleSpec { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *DeviceTaintRuleStatus) DeepCopyInto(out *DeviceTaintRuleStatus) { + *out = *in + if in.Conditions != nil { + in, out := &in.Conditions, &out.Conditions + *out = make([]v1.Condition, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DeviceTaintRuleStatus. +func (in *DeviceTaintRuleStatus) DeepCopy() *DeviceTaintRuleStatus { + if in == nil { + return nil + } + out := new(DeviceTaintRuleStatus) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *DeviceTaintSelector) DeepCopyInto(out *DeviceTaintSelector) { *out = *in - if in.DeviceClassName != nil { - in, out := &in.DeviceClassName, &out.DeviceClassName - *out = new(string) - **out = **in - } if in.Driver != nil { in, out := &in.Driver, &out.Driver *out = new(string) @@ -187,13 +207,6 @@ func (in *DeviceTaintSelector) DeepCopyInto(out *DeviceTaintSelector) { *out = new(string) **out = **in } - if in.Selectors != nil { - in, out := &in.Selectors, &out.Selectors - *out = make([]DeviceSelector, len(*in)) - for i := range *in { - (*in)[i].DeepCopyInto(&(*out)[i]) - } - } return } diff --git a/staging/src/k8s.io/api/resource/v1alpha3/zz_generated.model_name.go b/staging/src/k8s.io/api/resource/v1alpha3/zz_generated.model_name.go index fecb50dd70a..1c1672b4d61 100644 --- a/staging/src/k8s.io/api/resource/v1alpha3/zz_generated.model_name.go +++ b/staging/src/k8s.io/api/resource/v1alpha3/zz_generated.model_name.go @@ -51,6 +51,11 @@ func (in DeviceTaintRuleSpec) OpenAPIModelName() string { return "io.k8s.api.resource.v1alpha3.DeviceTaintRuleSpec" } +// OpenAPIModelName returns the OpenAPI model name for this type. +func (in DeviceTaintRuleStatus) OpenAPIModelName() string { + return "io.k8s.api.resource.v1alpha3.DeviceTaintRuleStatus" +} + // OpenAPIModelName returns the OpenAPI model name for this type. func (in DeviceTaintSelector) OpenAPIModelName() string { return "io.k8s.api.resource.v1alpha3.DeviceTaintSelector" diff --git a/staging/src/k8s.io/api/resource/v1beta1/generated.proto b/staging/src/k8s.io/api/resource/v1beta1/generated.proto index e7e39be818b..762ca668e1c 100644 --- a/staging/src/k8s.io/api/resource/v1beta1/generated.proto +++ b/staging/src/k8s.io/api/resource/v1beta1/generated.proto @@ -185,7 +185,9 @@ message BasicDevice { // If specified, these are the driver-defined taints. // - // The maximum number of taints is 4. + // The maximum number of taints is 16. If taints are set for + // any device in a ResourceSlice, then the maximum number of + // allowed devices per ResourceSlice is 64 instead of 128. // // This is an alpha field and requires enabling the DRADeviceTaints // feature gate. @@ -1227,8 +1229,10 @@ message DeviceTaint { // The effect of the taint on claims that do not tolerate the taint // and through such claims on the pods using them. - // Valid effects are NoSchedule and NoExecute. PreferNoSchedule as used for - // nodes is not valid here. + // + // Valid effects are None, NoSchedule and NoExecute. PreferNoSchedule as used for + // nodes is not valid here. More effects may get added in the future. + // Consumers must treat unknown effects like None. // // +required // +k8s:required @@ -1654,7 +1658,7 @@ message ResourceSliceSpec { // Devices lists some or all of the devices in this pool. // - // Must not have more than 128 entries. + // Must not have more than 128 entries. If any device uses taints the limit is 64. // // +optional // +listType=atomic diff --git a/staging/src/k8s.io/api/resource/v1beta1/types_swagger_doc_generated.go b/staging/src/k8s.io/api/resource/v1beta1/types_swagger_doc_generated.go index bfddc82d9d6..5db51c05c45 100644 --- a/staging/src/k8s.io/api/resource/v1beta1/types_swagger_doc_generated.go +++ b/staging/src/k8s.io/api/resource/v1beta1/types_swagger_doc_generated.go @@ -61,7 +61,7 @@ var map_BasicDevice = map[string]string{ "nodeName": "NodeName identifies the node where the device is available.\n\nMust only be set if Spec.PerDeviceNodeSelection is set to true. At most one of NodeName, NodeSelector and AllNodes can be set.", "nodeSelector": "NodeSelector defines the nodes where the device is available.\n\nMust use exactly one term.\n\nMust only be set if Spec.PerDeviceNodeSelection is set to true. At most one of NodeName, NodeSelector and AllNodes can be set.", "allNodes": "AllNodes indicates that all nodes have access to the device.\n\nMust only be set if Spec.PerDeviceNodeSelection is set to true. At most one of NodeName, NodeSelector and AllNodes can be set.", - "taints": "If specified, these are the driver-defined taints.\n\nThe maximum number of taints is 4.\n\nThis is an alpha field and requires enabling the DRADeviceTaints feature gate.", + "taints": "If specified, these are the driver-defined taints.\n\nThe maximum number of taints is 16. If taints are set for any device in a ResourceSlice, then the maximum number of allowed devices per ResourceSlice is 64 instead of 128.\n\nThis is an alpha field and requires enabling the DRADeviceTaints feature gate.", "bindsToNode": "BindsToNode indicates if the usage of an allocation involving this device has to be limited to exactly the node that was chosen when allocating the claim. If set to true, the scheduler will set the ResourceClaim.Status.Allocation.NodeSelector to match the node where the allocation was made.\n\nThis is an alpha field and requires enabling the DRADeviceBindingConditions and DRAResourceClaimDeviceStatus feature gates.", "bindingConditions": "BindingConditions defines the conditions for proceeding with binding. All of these conditions must be set in the per-device status conditions with a value of True to proceed with binding the pod to the node while scheduling the pod.\n\nThe maximum number of binding conditions is 4.\n\nThe conditions must be a valid condition type string.\n\nThis is an alpha field and requires enabling the DRADeviceBindingConditions and DRAResourceClaimDeviceStatus feature gates.", "bindingFailureConditions": "BindingFailureConditions defines the conditions for binding failure. They may be set in the per-device status conditions. If any is true, a binding failure occurred.\n\nThe maximum number of binding failure conditions is 4.\n\nThe conditions must be a valid condition type string.\n\nThis is an alpha field and requires enabling the DRADeviceBindingConditions and DRAResourceClaimDeviceStatus feature gates.", @@ -335,7 +335,7 @@ var map_DeviceTaint = map[string]string{ "": "The device this taint is attached to has the \"effect\" on any claim which does not tolerate the taint and, through the claim, to pods using the claim.", "key": "The taint key to be applied to a device. Must be a label name.", "value": "The taint value corresponding to the taint key. Must be a label value.", - "effect": "The effect of the taint on claims that do not tolerate the taint and through such claims on the pods using them. Valid effects are NoSchedule and NoExecute. PreferNoSchedule as used for nodes is not valid here.", + "effect": "The effect of the taint on claims that do not tolerate the taint and through such claims on the pods using them.\n\nValid effects are None, NoSchedule and NoExecute. PreferNoSchedule as used for nodes is not valid here. More effects may get added in the future. Consumers must treat unknown effects like None.", "timeAdded": "TimeAdded represents the time at which the taint was added. Added automatically during create or update if not set.", } @@ -498,7 +498,7 @@ var map_ResourceSliceSpec = map[string]string{ "nodeName": "NodeName identifies the node which provides the resources in this pool. A field selector can be used to list only ResourceSlice objects belonging to a certain node.\n\nThis field can be used to limit access from nodes to ResourceSlices with the same node name. It also indicates to autoscalers that adding new nodes of the same type as some old node might also make new resources available.\n\nExactly one of NodeName, NodeSelector, AllNodes, and PerDeviceNodeSelection must be set. This field is immutable.", "nodeSelector": "NodeSelector defines which nodes have access to the resources in the pool, when that pool is not limited to a single node.\n\nMust use exactly one term.\n\nExactly one of NodeName, NodeSelector, AllNodes, and PerDeviceNodeSelection must be set.", "allNodes": "AllNodes indicates that all nodes have access to the resources in the pool.\n\nExactly one of NodeName, NodeSelector, AllNodes, and PerDeviceNodeSelection must be set.", - "devices": "Devices lists some or all of the devices in this pool.\n\nMust not have more than 128 entries.", + "devices": "Devices lists some or all of the devices in this pool.\n\nMust not have more than 128 entries. If any device uses taints the limit is 64.", "perDeviceNodeSelection": "PerDeviceNodeSelection defines whether the access from nodes to resources in the pool is set on the ResourceSlice level or on each device. If it is set to true, every device defined the ResourceSlice must specify this individually.\n\nExactly one of NodeName, NodeSelector, AllNodes, and PerDeviceNodeSelection must be set.", "sharedCounters": "SharedCounters defines a list of counter sets, each of which has a name and a list of counters available.\n\nThe names of the SharedCounters must be unique in the ResourceSlice.\n\nThe maximum number of SharedCounters is 32.", } diff --git a/staging/src/k8s.io/api/resource/v1beta2/generated.proto b/staging/src/k8s.io/api/resource/v1beta2/generated.proto index cacbdf942e0..f03218e0968 100644 --- a/staging/src/k8s.io/api/resource/v1beta2/generated.proto +++ b/staging/src/k8s.io/api/resource/v1beta2/generated.proto @@ -395,7 +395,9 @@ message Device { // If specified, these are the driver-defined taints. // - // The maximum number of taints is 4. + // The maximum number of taints is 16. If taints are set for + // any device in a ResourceSlice, then the maximum number of + // allowed devices per ResourceSlice is 64 instead of 128. // // This is an alpha field and requires enabling the DRADeviceTaints // feature gate. @@ -1103,8 +1105,10 @@ message DeviceTaint { // The effect of the taint on claims that do not tolerate the taint // and through such claims on the pods using them. - // Valid effects are NoSchedule and NoExecute. PreferNoSchedule as used for - // nodes is not valid here. + // + // Valid effects are None, NoSchedule and NoExecute. PreferNoSchedule as used for + // nodes is not valid here. More effects may get added in the future. + // Consumers must treat unknown effects like None. // // +required // +k8s:required @@ -1640,7 +1644,7 @@ message ResourceSliceSpec { // Devices lists some or all of the devices in this pool. // - // Must not have more than 128 entries. + // Must not have more than 128 entries. If any device uses taints the limit is 64. // // +optional // +listType=atomic diff --git a/staging/src/k8s.io/api/resource/v1beta2/types_swagger_doc_generated.go b/staging/src/k8s.io/api/resource/v1beta2/types_swagger_doc_generated.go index 1b2ac954b0a..c1592925e62 100644 --- a/staging/src/k8s.io/api/resource/v1beta2/types_swagger_doc_generated.go +++ b/staging/src/k8s.io/api/resource/v1beta2/types_swagger_doc_generated.go @@ -121,7 +121,7 @@ var map_Device = map[string]string{ "nodeName": "NodeName identifies the node where the device is available.\n\nMust only be set if Spec.PerDeviceNodeSelection is set to true. At most one of NodeName, NodeSelector and AllNodes can be set.", "nodeSelector": "NodeSelector defines the nodes where the device is available.\n\nMust use exactly one term.\n\nMust only be set if Spec.PerDeviceNodeSelection is set to true. At most one of NodeName, NodeSelector and AllNodes can be set.", "allNodes": "AllNodes indicates that all nodes have access to the device.\n\nMust only be set if Spec.PerDeviceNodeSelection is set to true. At most one of NodeName, NodeSelector and AllNodes can be set.", - "taints": "If specified, these are the driver-defined taints.\n\nThe maximum number of taints is 4.\n\nThis is an alpha field and requires enabling the DRADeviceTaints feature gate.", + "taints": "If specified, these are the driver-defined taints.\n\nThe maximum number of taints is 16. If taints are set for any device in a ResourceSlice, then the maximum number of allowed devices per ResourceSlice is 64 instead of 128.\n\nThis is an alpha field and requires enabling the DRADeviceTaints feature gate.", "bindsToNode": "BindsToNode indicates if the usage of an allocation involving this device has to be limited to exactly the node that was chosen when allocating the claim. If set to true, the scheduler will set the ResourceClaim.Status.Allocation.NodeSelector to match the node where the allocation was made.\n\nThis is an alpha field and requires enabling the DRADeviceBindingConditions and DRAResourceClaimDeviceStatus feature gates.", "bindingConditions": "BindingConditions defines the conditions for proceeding with binding. All of these conditions must be set in the per-device status conditions with a value of True to proceed with binding the pod to the node while scheduling the pod.\n\nThe maximum number of binding conditions is 4.\n\nThe conditions must be a valid condition type string.\n\nThis is an alpha field and requires enabling the DRADeviceBindingConditions and DRAResourceClaimDeviceStatus feature gates.", "bindingFailureConditions": "BindingFailureConditions defines the conditions for binding failure. They may be set in the per-device status conditions. If any is set to \"True\", a binding failure occurred.\n\nThe maximum number of binding failure conditions is 4.\n\nThe conditions must be a valid condition type string.\n\nThis is an alpha field and requires enabling the DRADeviceBindingConditions and DRAResourceClaimDeviceStatus feature gates.", @@ -320,7 +320,7 @@ var map_DeviceTaint = map[string]string{ "": "The device this taint is attached to has the \"effect\" on any claim which does not tolerate the taint and, through the claim, to pods using the claim.", "key": "The taint key to be applied to a device. Must be a label name.", "value": "The taint value corresponding to the taint key. Must be a label value.", - "effect": "The effect of the taint on claims that do not tolerate the taint and through such claims on the pods using them. Valid effects are NoSchedule and NoExecute. PreferNoSchedule as used for nodes is not valid here.", + "effect": "The effect of the taint on claims that do not tolerate the taint and through such claims on the pods using them.\n\nValid effects are None, NoSchedule and NoExecute. PreferNoSchedule as used for nodes is not valid here. More effects may get added in the future. Consumers must treat unknown effects like None.", "timeAdded": "TimeAdded represents the time at which the taint was added. Added automatically during create or update if not set.", } @@ -498,7 +498,7 @@ var map_ResourceSliceSpec = map[string]string{ "nodeName": "NodeName identifies the node which provides the resources in this pool. A field selector can be used to list only ResourceSlice objects belonging to a certain node.\n\nThis field can be used to limit access from nodes to ResourceSlices with the same node name. It also indicates to autoscalers that adding new nodes of the same type as some old node might also make new resources available.\n\nExactly one of NodeName, NodeSelector, AllNodes, and PerDeviceNodeSelection must be set. This field is immutable.", "nodeSelector": "NodeSelector defines which nodes have access to the resources in the pool, when that pool is not limited to a single node.\n\nMust use exactly one term.\n\nExactly one of NodeName, NodeSelector, AllNodes, and PerDeviceNodeSelection must be set.", "allNodes": "AllNodes indicates that all nodes have access to the resources in the pool.\n\nExactly one of NodeName, NodeSelector, AllNodes, and PerDeviceNodeSelection must be set.", - "devices": "Devices lists some or all of the devices in this pool.\n\nMust not have more than 128 entries.", + "devices": "Devices lists some or all of the devices in this pool.\n\nMust not have more than 128 entries. If any device uses taints the limit is 64.", "perDeviceNodeSelection": "PerDeviceNodeSelection defines whether the access from nodes to resources in the pool is set on the ResourceSlice level or on each device. If it is set to true, every device defined the ResourceSlice must specify this individually.\n\nExactly one of NodeName, NodeSelector, AllNodes, and PerDeviceNodeSelection must be set.", "sharedCounters": "SharedCounters defines a list of counter sets, each of which has a name and a list of counters available.\n\nThe names of the SharedCounters must be unique in the ResourceSlice.\n\nThe maximum number of counters in all sets is 32.", } diff --git a/staging/src/k8s.io/api/testdata/HEAD/resource.k8s.io.v1alpha3.DeviceTaintRule.json b/staging/src/k8s.io/api/testdata/HEAD/resource.k8s.io.v1alpha3.DeviceTaintRule.json index 84ad1495680..c574f84cb2e 100644 --- a/staging/src/k8s.io/api/testdata/HEAD/resource.k8s.io.v1alpha3.DeviceTaintRule.json +++ b/staging/src/k8s.io/api/testdata/HEAD/resource.k8s.io.v1alpha3.DeviceTaintRule.json @@ -45,17 +45,9 @@ }, "spec": { "deviceSelector": { - "deviceClassName": "deviceClassNameValue", "driver": "driverValue", "pool": "poolValue", - "device": "deviceValue", - "selectors": [ - { - "cel": { - "expression": "expressionValue" - } - } - ] + "device": "deviceValue" }, "taint": { "key": "keyValue", @@ -63,5 +55,17 @@ "effect": "effectValue", "timeAdded": "2004-01-01T01:01:01Z" } + }, + "status": { + "conditions": [ + { + "type": "typeValue", + "status": "statusValue", + "observedGeneration": 3, + "lastTransitionTime": "2004-01-01T01:01:01Z", + "reason": "reasonValue", + "message": "messageValue" + } + ] } } \ No newline at end of file diff --git a/staging/src/k8s.io/api/testdata/HEAD/resource.k8s.io.v1alpha3.DeviceTaintRule.pb b/staging/src/k8s.io/api/testdata/HEAD/resource.k8s.io.v1alpha3.DeviceTaintRule.pb index 60244465bfcc245cd915a3eb004db19e5d4f7ae5..feaa316ef8d4237d135f6649cd72fd912ae14b79 100644 GIT binary patch delta 95 zcmbQwvW;be5#zs&#;J_VVO*+{H!-SCe#2o0QWN$7ytkO diff --git a/staging/src/k8s.io/api/testdata/HEAD/resource.k8s.io.v1alpha3.DeviceTaintRule.yaml b/staging/src/k8s.io/api/testdata/HEAD/resource.k8s.io.v1alpha3.DeviceTaintRule.yaml index 8d06614b992..8c2bc6cfd3d 100644 --- a/staging/src/k8s.io/api/testdata/HEAD/resource.k8s.io.v1alpha3.DeviceTaintRule.yaml +++ b/staging/src/k8s.io/api/testdata/HEAD/resource.k8s.io.v1alpha3.DeviceTaintRule.yaml @@ -35,14 +35,18 @@ metadata: spec: deviceSelector: device: deviceValue - deviceClassName: deviceClassNameValue driver: driverValue pool: poolValue - selectors: - - cel: - expression: expressionValue taint: effect: effectValue key: keyValue timeAdded: "2004-01-01T01:01:01Z" value: valueValue +status: + conditions: + - lastTransitionTime: "2004-01-01T01:01:01Z" + message: messageValue + observedGeneration: 3 + reason: reasonValue + status: statusValue + type: typeValue diff --git a/staging/src/k8s.io/api/testdata/v1.33.0/resource.k8s.io.v1alpha3.DeviceTaintRule.after_roundtrip.json b/staging/src/k8s.io/api/testdata/v1.33.0/resource.k8s.io.v1alpha3.DeviceTaintRule.after_roundtrip.json new file mode 100644 index 00000000000..55fd38b03b6 --- /dev/null +++ b/staging/src/k8s.io/api/testdata/v1.33.0/resource.k8s.io.v1alpha3.DeviceTaintRule.after_roundtrip.json @@ -0,0 +1,60 @@ +{ + "kind": "DeviceTaintRule", + "apiVersion": "resource.k8s.io/v1alpha3", + "metadata": { + "name": "nameValue", + "generateName": "generateNameValue", + "namespace": "namespaceValue", + "selfLink": "selfLinkValue", + "uid": "uidValue", + "resourceVersion": "resourceVersionValue", + "generation": 7, + "creationTimestamp": "2008-01-01T01:01:01Z", + "deletionTimestamp": "2009-01-01T01:01:01Z", + "deletionGracePeriodSeconds": 10, + "labels": { + "labelsKey": "labelsValue" + }, + "annotations": { + "annotationsKey": "annotationsValue" + }, + "ownerReferences": [ + { + "apiVersion": "apiVersionValue", + "kind": "kindValue", + "name": "nameValue", + "uid": "uidValue", + "controller": true, + "blockOwnerDeletion": true + } + ], + "finalizers": [ + "finalizersValue" + ], + "managedFields": [ + { + "manager": "managerValue", + "operation": "operationValue", + "apiVersion": "apiVersionValue", + "time": "2004-01-01T01:01:01Z", + "fieldsType": "fieldsTypeValue", + "fieldsV1": {}, + "subresource": "subresourceValue" + } + ] + }, + "spec": { + "deviceSelector": { + "driver": "driverValue", + "pool": "poolValue", + "device": "deviceValue" + }, + "taint": { + "key": "keyValue", + "value": "valueValue", + "effect": "effectValue", + "timeAdded": "2004-01-01T01:01:01Z" + } + }, + "status": {} +} \ No newline at end of file diff --git a/staging/src/k8s.io/api/testdata/v1.33.0/resource.k8s.io.v1alpha3.DeviceTaintRule.after_roundtrip.pb b/staging/src/k8s.io/api/testdata/v1.33.0/resource.k8s.io.v1alpha3.DeviceTaintRule.after_roundtrip.pb new file mode 100644 index 0000000000000000000000000000000000000000..ff8debd3e4d203528c89d81acee25646ff494f4b GIT binary patch literal 501 zcmZ8e%SyvQ6ipvsGSM~$7iE?KHx=s(A-F8AR79k7;ck-L)G^bUFp~tT_yhihYd=B! z0ipjOE?oNunojHkcjvy&IrrYgR|eWa9SKV0GK5~@s~#15#Q|aIEje=Ar%=!k288nL zI%nX%HPA;LS)7bvNLUW;<_I`QlELLXgd_j4lC%R@%BaIHX=DI_W ziVCi?{#xHOr_W!nO~*JZ(aZOHiLTI12U&~+z|=WRT)TpLBJfa)a4s^El_a|Cti-zK z?ig7K<&`jA`u7%Yx^0ruRU`W~zb4x;<%H4u(xB#fss(6$OgI^V)CspG(%JrV`}jYN zSMKmR{b@MHVMzuoQiDlaUFM{T>JN{7N9Dod68fk^wC&oFq{W{Iwo)OO*7NL0zoZi1 bMP>q%3Zr5+t1-4Ajv>r+urz=TYd_j4lC%R@%BaIHX=DI_W ziVCi?{#xHOr_W!nO~*JZ(aZOHiLTI12U&~+z|=WRT)TpLBJfa)a4s^El_a|Cti-zK z?ig7K<&`jA`u7%Yx^0ruRU`W~zb4x;<%H4u(xB#fss(6$OgI^V)CspG(%JrV`}jYN zSMKmR{b@MHVMzuoQiDlaUFM{T>JN{7N9Dod68fk^wC&oFq{W{Iwo)OO*7NL0zoZi1 bMP>q%3Zr5+t1-4Ajv>r+urz=TY Date: Fri, 19 Sep 2025 15:59:29 +0200 Subject: [PATCH 05/11] DRA: implementation of none taint effect While at it, ensure that future unknown effects are treating like the None effect. --- .../device_taint_eviction_test.go | 40 +++++++++++++++++++ .../resourceclaim/devicetoleration.go | 3 ++ .../allocatortesting/allocator_testing.go | 20 ++++++++++ .../experimental/allocator_experimental.go | 14 ++++--- .../incubating/allocator_incubating.go | 14 ++++--- 5 files changed, 81 insertions(+), 10 deletions(-) diff --git a/pkg/controller/devicetainteviction/device_taint_eviction_test.go b/pkg/controller/devicetainteviction/device_taint_eviction_test.go index 32056fec003..7d50a15696f 100644 --- a/pkg/controller/devicetainteviction/device_taint_eviction_test.go +++ b/pkg/controller/devicetainteviction/device_taint_eviction_test.go @@ -249,6 +249,24 @@ var ( slice.Spec.Devices[len(slice.Spec.Devices)-1].Name += "-other" return slice }() + sliceTaintedNone = func() *resourceapi.ResourceSlice { + slice := sliceTainted.DeepCopy() + for i := range slice.Spec.Devices { + for j := range slice.Spec.Devices[i].Taints { + slice.Spec.Devices[i].Taints[j].Effect = resourceapi.DeviceTaintEffectNone + } + } + return slice + }() + sliceTaintedUnknown = func() *resourceapi.ResourceSlice { + slice := sliceTainted.DeepCopy() + for i := range slice.Spec.Devices { + for j := range slice.Spec.Devices[i].Taints { + slice.Spec.Devices[i].Taints[j].Effect = resourceapi.DeviceTaintEffect("unknown-effect") + } + } + return slice + }() sliceTaintedNoSchedule = func() *resourceapi.ResourceSlice { slice := sliceTainted.DeepCopy() for i := range slice.Spec.Devices { @@ -960,6 +978,28 @@ func TestHandlers(t *testing.T) { }, wantEvents: []*v1.Event{cancelPodEviction}, }, + "ignore-none-effect": { + initialState: state{ + pods: []*v1.Pod{podWithClaimTemplateInStatus}, + slices: []*resourceapi.ResourceSlice{sliceTaintedNone, slice2}, + allocatedClaims: []allocatedClaim{{ResourceClaim: inUseClaim}}, + }, + finalState: state{ + slices: []*resourceapi.ResourceSlice{sliceTaintedNone, slice2}, + allocatedClaims: []allocatedClaim{{ResourceClaim: inUseClaim}}, + }, + }, + "ignore-unknown-effect": { + initialState: state{ + pods: []*v1.Pod{podWithClaimTemplateInStatus}, + slices: []*resourceapi.ResourceSlice{sliceTaintedUnknown, slice2}, + allocatedClaims: []allocatedClaim{{ResourceClaim: inUseClaim}}, + }, + finalState: state{ + slices: []*resourceapi.ResourceSlice{sliceTaintedUnknown, slice2}, + allocatedClaims: []allocatedClaim{{ResourceClaim: inUseClaim}}, + }, + }, "eviction-change-taint": { initialState: state{ pods: []*v1.Pod{podWithClaimTemplateInStatus}, diff --git a/staging/src/k8s.io/dynamic-resource-allocation/resourceclaim/devicetoleration.go b/staging/src/k8s.io/dynamic-resource-allocation/resourceclaim/devicetoleration.go index 3158b581b9a..0a09835dd56 100644 --- a/staging/src/k8s.io/dynamic-resource-allocation/resourceclaim/devicetoleration.go +++ b/staging/src/k8s.io/dynamic-resource-allocation/resourceclaim/devicetoleration.go @@ -29,6 +29,9 @@ import ( // 3. Empty toleration.key means to match all taint keys. // If toleration.key is empty, toleration.operator must be 'Exists'; // this combination means to match all taint values and all taint keys. +// +// Callers must check separately what the effect is and only react to +// known effects. Unknown effects and the None effect must be ignored. func ToleratesTaint(toleration resourceapi.DeviceToleration, taint resourceapi.DeviceTaint) bool { // This code was copied from https://github.com/kubernetes/kubernetes/blob/f007012f5fe49e40ae0596cf463a8e7b247b3357/staging/src/k8s.io/api/core/v1/toleration.go#L39-L56. // It wasn't placed in the resourceapi package because code related to logic diff --git a/staging/src/k8s.io/dynamic-resource-allocation/structured/internal/allocatortesting/allocator_testing.go b/staging/src/k8s.io/dynamic-resource-allocation/structured/internal/allocatortesting/allocator_testing.go index 6acaf27710b..02647a0296e 100644 --- a/staging/src/k8s.io/dynamic-resource-allocation/structured/internal/allocatortesting/allocator_testing.go +++ b/staging/src/k8s.io/dynamic-resource-allocation/structured/internal/allocatortesting/allocator_testing.go @@ -867,6 +867,11 @@ func TestAllocator(t *testing.T, taintKey := "taint-key" taintValue := "taint-value" taintValue2 := "taint-value-2" + taintNone := resourceapi.DeviceTaint{ + Key: taintKey, + Value: taintValue, + Effect: resourceapi.DeviceTaintEffectNone, + } taintNoSchedule := resourceapi.DeviceTaint{ Key: taintKey, Value: taintValue, @@ -3524,6 +3529,21 @@ func TestAllocator(t *testing.T, deviceAllocationResult(req0SubReq1, driverA, pool1, device2, false, tolerationNoExecute), // Only second device's taints are tolerated. )}, }, + "tainted-no-effect": { + features: Features{ + DeviceTaints: true, + }, + claimsToAllocate: objects(claimWithRequest(claim0, req0, classA)), + classes: objects(class(classA, driverA)), + slices: unwrap(slice(slice1, node1, pool1, driverA, + device(device1, nil, nil).withTaints(taintNone), + )), + node: node(node1, region1), + expectResults: []any{allocationResult( + localNodeSelector(node1), + deviceAllocationResult(req0, driverA, pool1, device1, false), + )}, + }, "tainted-one-device-two-taints-both-tolerated": { features: Features{ DeviceTaints: true, diff --git a/staging/src/k8s.io/dynamic-resource-allocation/structured/internal/experimental/allocator_experimental.go b/staging/src/k8s.io/dynamic-resource-allocation/structured/internal/experimental/allocator_experimental.go index 03df2f02cee..db24a07cd32 100644 --- a/staging/src/k8s.io/dynamic-resource-allocation/structured/internal/experimental/allocator_experimental.go +++ b/staging/src/k8s.io/dynamic-resource-allocation/structured/internal/experimental/allocator_experimental.go @@ -1265,7 +1265,7 @@ func (alloc *allocator) allocateDevice(r deviceIndices, device deviceWithID, mus // Might be tainted, in which case the taint has to be tolerated. // The check is skipped if the feature is disabled. - if alloc.features.DeviceTaints && !allTaintsTolerated(device.Device, request) { + if alloc.features.DeviceTaints && taintPreventsAllocation(device.Device, request) { return false, nil, nil } @@ -1361,13 +1361,17 @@ func (alloc *allocator) allocateDevice(r deviceIndices, device deviceWithID, mus }, nil } -func allTaintsTolerated(device *draapi.Device, request requestAccessor) bool { +func taintPreventsAllocation(device *draapi.Device, request requestAccessor) bool { for _, taint := range device.Taints { - if !taintTolerated(taint, request) { - return false + switch taint.Effect { + // Only known effects prevent allocation, others (including None) are ignored. + case resourceapi.DeviceTaintEffectNoExecute, resourceapi.DeviceTaintEffectNoSchedule: + if !taintTolerated(taint, request) { + return true + } } } - return true + return false } func taintTolerated(taint resourceapi.DeviceTaint, request requestAccessor) bool { diff --git a/staging/src/k8s.io/dynamic-resource-allocation/structured/internal/incubating/allocator_incubating.go b/staging/src/k8s.io/dynamic-resource-allocation/structured/internal/incubating/allocator_incubating.go index 0677a0cea29..a52c5eb8787 100644 --- a/staging/src/k8s.io/dynamic-resource-allocation/structured/internal/incubating/allocator_incubating.go +++ b/staging/src/k8s.io/dynamic-resource-allocation/structured/internal/incubating/allocator_incubating.go @@ -1108,7 +1108,7 @@ func (alloc *allocator) allocateDevice(r deviceIndices, device deviceWithID, mus // Might be tainted, in which case the taint has to be tolerated. // The check is skipped if the feature is disabled. - if alloc.features.DeviceTaints && !allTaintsTolerated(device.Device, request) { + if alloc.features.DeviceTaints && taintPreventsAllocation(device.Device, request) { return false, nil, nil } @@ -1166,13 +1166,17 @@ func (alloc *allocator) allocateDevice(r deviceIndices, device deviceWithID, mus }, nil } -func allTaintsTolerated(device *draapi.Device, request requestAccessor) bool { +func taintPreventsAllocation(device *draapi.Device, request requestAccessor) bool { for _, taint := range device.Taints { - if !taintTolerated(taint, request) { - return false + switch taint.Effect { + // Only known effects prevent allocation, others (including None) are ignored. + case resourceapi.DeviceTaintEffectNoExecute, resourceapi.DeviceTaintEffectNoSchedule: + if !taintTolerated(taint, request) { + return true + } } } - return true + return false } func taintTolerated(taint resourceapi.DeviceTaint, request requestAccessor) bool { From e5fcd20a26452a465f22967679f9068d13d5f9ff Mon Sep 17 00:00:00 2001 From: Patrick Ohly Date: Thu, 23 Oct 2025 09:06:43 +0200 Subject: [PATCH 06/11] DRA device taints: tighten controller test We know how often the controller should get a pod, let's check it. Must run before we do our own GET call. --- .../device_taint_eviction_test.go | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/pkg/controller/devicetainteviction/device_taint_eviction_test.go b/pkg/controller/devicetainteviction/device_taint_eviction_test.go index 7d50a15696f..9795925ad40 100644 --- a/pkg/controller/devicetainteviction/device_taint_eviction_test.go +++ b/pkg/controller/devicetainteviction/device_taint_eviction_test.go @@ -1472,20 +1472,27 @@ func TestEviction(t *testing.T) { fakeClientset := fake.NewSimpleClientset(tt.initialObjects...) tCtx = ktesting.WithClients(tCtx, nil, nil, fakeClientset, nil, nil) + var podGets int var podUpdates int var updatedPod *v1.Pod var podDeletions int + fakeClientset.PrependReactor("get", "pods", func(action core.Action) (handled bool, ret runtime.Object, err error) { + podGets++ + podName := action.(core.GetAction).GetName() + assert.Equal(t, podWithClaimName.Name, podName, "name of pod to patch") + return false, nil, nil + }) fakeClientset.PrependReactor("patch", "pods", func(action core.Action) (handled bool, ret runtime.Object, err error) { podUpdates++ podName := action.(core.PatchAction).GetName() - assert.Equal(t, podWithClaimName.Name, podName, "name of patched pod") + assert.Equal(t, podWithClaimName.Name, podName, "name of pod to get") return false, nil, nil }) fakeClientset.PrependReactor("delete", "pods", func(action core.Action) (handled bool, ret runtime.Object, err error) { podDeletions++ podName := action.(core.DeleteAction).GetName() - assert.Equal(t, podWithClaimName.Name, podName, "name of deleted pod") + assert.Equal(t, podWithClaimName.Name, podName, "name of pod to delete") obj, err := fakeClientset.Tracker().Get(v1.SchemeGroupVersion.WithResource("pods"), pod.Namespace, pod.Name) require.NoError(tCtx, err) updatedPod = obj.(*v1.Pod) @@ -1525,9 +1532,14 @@ func TestEviction(t *testing.T) { } } - // Eventually the pod gets deleted (= evicted). // We can wait for the controller to be idle. tCtx.Wait() + + // The number of API calls is deterministic. + assert.Equal(tCtx, 1, podGets, "get pod once") + assert.Equal(tCtx, 1, podUpdates, "update pod once") + assert.Equal(tCtx, 1, podDeletions, "delete pod once") + _, err := fakeClientset.CoreV1().Pods(pod.Namespace).Get(tCtx, pod.Name, metav1.GetOptions{}) switch { case err == nil: From c69259cb7134d4e4ce7521c052ce5ff1729e2eca Mon Sep 17 00:00:00 2001 From: Patrick Ohly Date: Fri, 24 Oct 2025 10:09:24 +0200 Subject: [PATCH 07/11] DRA device taints: switch to workqueue in controller The approach copied from node taint eviction was to fire off one goroutine per pod the intended time. This leads to the "thundering herd" problem: when a single taint causes eviction of several pods and those all have no or the same toleration grace period, then they all get deleted concurrently at the same time. For node taint eviction that is limited by the number of pods per node, which is typically ~100. In an integration test, that already led to problems with watchers: cacher.go:855] cacher (pods): 100 objects queued in incoming channel. cache_watcher.go:203] Forcing pods watcher close due to unresponsiveness: key: "/pods/", labels: "", fields: "". len(c.input) = 10, len(c.result) = 10, graceful = false It also causes spikes in memory consumption (mostly the 2KB stack per goroutine plus closure) with no upper limit. Using a workqueue makes concurrency more deterministic because there is an upper limit. In the integration test, 10 workers kept the watch active. Another advantage is that failures to evict the pod get retried with exponential backoff per affected pod forever. Previously, evicting was tried a few times with a fixed rate and then the controller gave up. If the apiserver was down long enough, pods didn't get evicted. --- .../device_taint_eviction.go | 266 +++++++++++++----- .../device_taint_eviction_test.go | 120 +++----- 2 files changed, 242 insertions(+), 144 deletions(-) diff --git a/pkg/controller/devicetainteviction/device_taint_eviction.go b/pkg/controller/devicetainteviction/device_taint_eviction.go index 6c9b201773e..bff689e108e 100644 --- a/pkg/controller/devicetainteviction/device_taint_eviction.go +++ b/pkg/controller/devicetainteviction/device_taint_eviction.go @@ -48,6 +48,7 @@ import ( resourcealphalisters "k8s.io/client-go/listers/resource/v1alpha3" "k8s.io/client-go/tools/cache" "k8s.io/client-go/tools/record" + "k8s.io/client-go/util/workqueue" "k8s.io/dynamic-resource-allocation/resourceclaim" "k8s.io/klog/v2" apipod "k8s.io/kubernetes/pkg/api/v1/pod" @@ -57,9 +58,12 @@ import ( ) const ( - // retries is the number of times that the controller tries to delete a pod - // that needs to be evicted. - retries = 5 + // This is a compromise between getting work done and not overwhelming the apiserver + // and pod informers. Integration testing with 100 workers modified pods so quickly + // that a watch in the integration test couldn't keep up: + // cacher.go:855] cacher (pods): 100 objects queued in incoming channel. + // cache_watcher.go:203] Forcing pods watcher close due to unresponsiveness: key: "/pods/", labels: "", fields: "". len(c.input) = 10, len(c.result) = 10, graceful = false + numWorkers = 10 ) // Controller listens to Taint changes of DRA devices and Toleration changes of ResourceClaims, @@ -94,7 +98,9 @@ type Controller struct { classInformer resourceinformers.DeviceClassInformer ruleLister resourcealphalisters.DeviceTaintRuleLister haveSynced []cache.InformerSynced + hasSynced atomic.Int32 metrics metrics.Metrics + workqueue workqueue.TypedRateLimitingInterface[workItem] // evictPod ensures that the pod gets evicted at the specified time. // It doesn't block. @@ -104,13 +110,24 @@ type Controller struct { // Idempotent, returns false if there was nothing to cancel. cancelEvict func(pod tainteviction.NamespacedObject) bool + // mutex protects the following shared data structures. + mutex sync.Mutex + + // deletePodAt maps a pod to the time when it is meant to be evicted. + // + // The entry for pod gets deleted when eviction is no longer necessary + // and updated when the time changes. + deletePodAt map[tainteviction.NamespacedObject]time.Time + + // maybeDeletePodCount counts how often a worker checked a pod. + // This is useful for unit testing, but probably not a good public metric. + maybeDeletePodCount int64 + // allocatedClaims holds all currently known allocated claims. allocatedClaims map[types.NamespacedName]allocatedClaim // A value is slightly more efficient in BenchmarkTaintUntaint (less allocations!). // pools indexes all slices by driver and pool name. pools map[poolID]pool - - hasSynced atomic.Int32 } type poolID struct { @@ -218,35 +235,77 @@ type allocatedClaim struct { evictionTime *metav1.Time } -func (tc *Controller) deletePodHandler(c clientset.Interface, emitEventFunc func(tainteviction.NamespacedObject)) func(ctx context.Context, fireAt time.Time, args *tainteviction.WorkArgs) error { - return func(ctx context.Context, fireAt time.Time, args *tainteviction.WorkArgs) error { - var err error - for i := 0; i < retries; i++ { - if i > 0 { - time.Sleep(10 * time.Millisecond) - } - err = addConditionAndDeletePod(ctx, c, args.Object, &emitEventFunc) - if apierrors.IsNotFound(err) { - // Not a problem, the work is done. - // But we didn't do it, so don't - // bump the metric. - return nil - } - if err == nil { - podDeletionLatency := time.Since(fireAt) - // TODO: include more information why it was evicted. - klog.FromContext(ctx).Info("Evicted pod by deleting it", "pod", args.Object, "latency", podDeletionLatency) - tc.metrics.PodDeletionsTotal.Inc() - tc.metrics.PodDeletionsLatency.Observe(float64(podDeletionLatency.Seconds())) - return nil - } - } - return err - } +// workItem is stored in a workqueue and describes some piece of work which +// needs to be done. +// +// Right now that work is deleting pods. +// Updating DeviceTaintRule status will be added later. +type workItem struct { + // podRef references a pod which may need to be deleted. + // + // Controller.deletePodAt is the source of truth for if and when the pod really needs to be removed. + podRef tainteviction.NamespacedObject } -func addConditionAndDeletePod(ctx context.Context, c clientset.Interface, podRef tainteviction.NamespacedObject, emitEventFunc *func(tainteviction.NamespacedObject)) (err error) { - pod, err := c.CoreV1().Pods(podRef.Namespace).Get(ctx, podRef.Name, metav1.GetOptions{}) +// maybeDeletePod checks whether the pod needs to be deleted now and if so, does it. +// Three results are possible: +// - an error if anything goes wrong and the operation needs to be repeated +// - a positive delay if the operation needs to be repeated in the future +// - a zero delay if the deletion is done or no longer necessary +func (tc *Controller) maybeDeletePod(ctx context.Context, podRef tainteviction.NamespacedObject) (againAfter time.Duration, finalErr error) { + logger := klog.FromContext(ctx) + + // We must not hold this mutex while doing blocking API calls. + // TODO: try an atomic map instead. + tc.mutex.Lock() + tc.maybeDeletePodCount++ + fireAt, ok := tc.deletePodAt[podRef] + tc.mutex.Unlock() + logger.V(5).Info("Processing pod deletion work item", "pod", podRef, "active", ok, "fireAt", fireAt) + + if !ok { + logger.V(5).Info("Work item for pod deletion obsolete, nothing to do", "pod", podRef) + return 0, nil + } + + now := time.Now() + againAfter = fireAt.Sub(now) + if againAfter > 0 { + // Not yet. Maybe the fireAt time got updated. + return againAfter, nil + } + + defer func() { + if finalErr == nil { + // Forget the deletion time, we are done. + tc.mutex.Lock() + delete(tc.deletePodAt, podRef) + tc.mutex.Unlock() + } + }() + + err := tc.addConditionAndDeletePod(ctx, podRef) + if apierrors.IsNotFound(err) { + // Not a problem, the work is done. + // But we didn't do it, so don't + // bump the metric. + return 0, nil + } + if err != nil { + return 0, err + } + + podDeletionLatency := time.Since(fireAt) + // TODO: include more information why it was evicted. + klog.FromContext(ctx).Info("Evicted pod by deleting it", "pod", podRef, "latency", podDeletionLatency) + tc.metrics.PodDeletionsTotal.Inc() + tc.metrics.PodDeletionsLatency.Observe(float64(podDeletionLatency.Seconds())) + + return 0, nil +} + +func (tc *Controller) addConditionAndDeletePod(ctx context.Context, podRef tainteviction.NamespacedObject) (err error) { + pod, err := tc.client.CoreV1().Pods(podRef.Namespace).Get(ctx, podRef.Name, metav1.GetOptions{}) if err != nil { return err } @@ -258,12 +317,14 @@ func addConditionAndDeletePod(ctx context.Context, c clientset.Interface, podRef return apierrors.NewNotFound(v1.SchemeGroupVersion.WithResource("pods").GroupResource(), pod.Name) } - // Emit the event only once, and only if we are actually doing something. - if *emitEventFunc != nil { - (*emitEventFunc)(podRef) - *emitEventFunc = nil + if pod.DeletionTimestamp != nil { + // Already deleted, no need to evict. + return nil } + // Emit the event only if we are actually doing something. + tc.emitPodDeletionEvent(podRef) + newStatus := pod.Status.DeepCopy() updated := apipod.UpdatePodCondition(newStatus, &v1.PodCondition{ Type: v1.DisruptionTarget, @@ -272,7 +333,7 @@ func addConditionAndDeletePod(ctx context.Context, c clientset.Interface, podRef Message: "Device Taint manager: deleting due to NoExecute taint", }) if updated { - if _, _, _, err := utilpod.PatchPodStatus(ctx, c, pod.Namespace, pod.Name, pod.UID, pod.Status, *newStatus); err != nil { + if _, _, _, err := utilpod.PatchPodStatus(ctx, tc.client, pod.Namespace, pod.Name, pod.UID, pod.Status, *newStatus); err != nil { return err } } @@ -280,7 +341,7 @@ func addConditionAndDeletePod(ctx context.Context, c clientset.Interface, podRef // another pod using the same name in the meantime. Include a precondition // to prevent that race. This delete attempt then fails and the next one detects // the new pod and stops retrying. - return c.CoreV1().Pods(podRef.Namespace).Delete(ctx, podRef.Name, metav1.DeleteOptions{ + return tc.client.CoreV1().Pods(podRef.Namespace).Delete(ctx, podRef.Name, metav1.DeleteOptions{ Preconditions: &metav1.Preconditions{ UID: &podRef.UID, }, @@ -303,6 +364,7 @@ func New(c clientset.Interface, podInformer coreinformers.PodInformer, claimInfo taintInformer: taintInformer, classInformer: classInformer, ruleLister: taintInformer.Lister(), + deletePodAt: make(map[tainteviction.NamespacedObject]time.Time), allocatedClaims: make(map[types.NamespacedName]allocatedClaim), pools: make(map[poolID]pool), // Instantiate all informers now to ensure that they get started. @@ -325,9 +387,12 @@ func (tc *Controller) Run(ctx context.Context) error { defer utilruntime.HandleCrash() logger := klog.FromContext(ctx) logger.Info("Starting", "controller", tc.name) - defer logger.Info("Shutting down controller", "controller", tc.name) + defer logger.Info("Shut down controller", "controller", tc.name, "reason", context.Cause(ctx)) tc.logger = logger + var wg sync.WaitGroup + defer wg.Wait() + // Doing debug logging? if loggerV := logger.V(6); loggerV.Enabled() { tc.eventLogger = &loggerV @@ -341,21 +406,47 @@ func (tc *Controller) Run(ctx context.Context) error { tc.recorder = eventBroadcaster.NewRecorder(scheme.Scheme, v1.EventSource{Component: tc.name}).WithLogger(logger) defer eventBroadcaster.Shutdown() - taintEvictionQueue := tainteviction.CreateWorkerQueue(tc.deletePodHandler(tc.client, tc.emitPodDeletionEvent)) + queueLogger := klog.LoggerWithName(logger, "workqueue") + delayingQueue := workqueue.NewTypedDelayingQueueWithConfig(workqueue.TypedDelayingQueueConfig[workItem]{ + Logger: &queueLogger, + Name: tc.name, + }) + tc.workqueue = workqueue.NewTypedRateLimitingQueueWithConfig( + workqueue.DefaultTypedControllerRateLimiter[workItem](), + workqueue.TypedRateLimitingQueueConfig[workItem]{ + Name: tc.name, + DelayingQueue: delayingQueue, + }, + ) + defer func() { + logger.V(3).Info("Shutting down work queue") + tc.workqueue.ShutDown() + }() + evictPod := tc.evictPod tc.evictPod = func(podRef tainteviction.NamespacedObject, fireAt time.Time) { + tc.deletePodAt[podRef] = fireAt + now := time.Now() + tc.workqueue.AddAfter(workItem{podRef: podRef}, fireAt.Sub(now)) + // Only relevant for testing. if evictPod != nil { evictPod(podRef, fireAt) } - taintEvictionQueue.UpdateWork(ctx, &tainteviction.WorkArgs{Object: podRef}, time.Now(), fireAt) } cancelEvict := tc.cancelEvict tc.cancelEvict = func(podRef tainteviction.NamespacedObject) bool { + _, ok := tc.deletePodAt[podRef] + if !ok { + // Nothing to cancel. + return false + } + delete(tc.deletePodAt, podRef) if cancelEvict != nil { cancelEvict(podRef) } - return taintEvictionQueue.CancelWork(logger, podRef.NamespacedName.String()) + // Cannot remove from a work queue. The worker will detect that the entry is obsolete by checking deletePodAt. + return true } // Start events processing pipeline. @@ -369,9 +460,6 @@ func (tc *Controller) Run(ctx context.Context) error { } defer eventBroadcaster.Shutdown() - // mutex serializes event processing. - var mutex sync.Mutex - claimHandler, err := tc.claimInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ AddFunc: func(obj any) { claim, ok := obj.(*resourceapi.ResourceClaim) @@ -379,8 +467,8 @@ func (tc *Controller) Run(ctx context.Context) error { logger.Error(nil, "Expected ResourceClaim", "actual", fmt.Sprintf("%T", obj)) return } - mutex.Lock() - defer mutex.Unlock() + tc.mutex.Lock() + defer tc.mutex.Unlock() tc.handleClaimChange(nil, claim) }, UpdateFunc: func(oldObj, newObj any) { @@ -393,8 +481,8 @@ func (tc *Controller) Run(ctx context.Context) error { if !ok { logger.Error(nil, "Expected ResourceClaim", "actual", fmt.Sprintf("%T", newObj)) } - mutex.Lock() - defer mutex.Unlock() + tc.mutex.Lock() + defer tc.mutex.Unlock() tc.handleClaimChange(oldClaim, newClaim) }, DeleteFunc: func(obj any) { @@ -406,8 +494,8 @@ func (tc *Controller) Run(ctx context.Context) error { logger.Error(nil, "Expected ResourceClaim", "actual", fmt.Sprintf("%T", obj)) return } - mutex.Lock() - defer mutex.Unlock() + tc.mutex.Lock() + defer tc.mutex.Unlock() tc.handleClaimChange(claim, nil) }, }) @@ -426,8 +514,8 @@ func (tc *Controller) Run(ctx context.Context) error { logger.Error(nil, "Expected Pod", "actual", fmt.Sprintf("%T", obj)) return } - mutex.Lock() - defer mutex.Unlock() + tc.mutex.Lock() + defer tc.mutex.Unlock() tc.handlePodChange(nil, pod) }, UpdateFunc: func(oldObj, newObj any) { @@ -440,8 +528,8 @@ func (tc *Controller) Run(ctx context.Context) error { if !ok { logger.Error(nil, "Expected Pod", "actual", fmt.Sprintf("%T", newObj)) } - mutex.Lock() - defer mutex.Unlock() + tc.mutex.Lock() + defer tc.mutex.Unlock() tc.handlePodChange(oldPod, newPod) }, DeleteFunc: func(obj any) { @@ -453,8 +541,8 @@ func (tc *Controller) Run(ctx context.Context) error { logger.Error(nil, "Expected Pod", "actual", fmt.Sprintf("%T", obj)) return } - mutex.Lock() - defer mutex.Unlock() + tc.mutex.Lock() + defer tc.mutex.Unlock() tc.handlePodChange(pod, nil) }, }) @@ -473,8 +561,8 @@ func (tc *Controller) Run(ctx context.Context) error { logger.Error(nil, "Expected DeviceTaintRule", "actual", fmt.Sprintf("%T", obj)) return } - mutex.Lock() - defer mutex.Unlock() + tc.mutex.Lock() + defer tc.mutex.Unlock() tc.handleRuleChange(nil, rule) }, UpdateFunc: func(oldObj, newObj any) { @@ -487,8 +575,8 @@ func (tc *Controller) Run(ctx context.Context) error { if !ok { logger.Error(nil, "Expected DeviceTaintRule", "actual", fmt.Sprintf("%T", newObj)) } - mutex.Lock() - defer mutex.Unlock() + tc.mutex.Lock() + defer tc.mutex.Unlock() tc.handleRuleChange(oldRule, newRule) }, DeleteFunc: func(obj any) { @@ -500,8 +588,8 @@ func (tc *Controller) Run(ctx context.Context) error { logger.Error(nil, "Expected DeviceTaintRule", "actual", fmt.Sprintf("%T", obj)) return } - mutex.Lock() - defer mutex.Unlock() + tc.mutex.Lock() + defer tc.mutex.Unlock() tc.handleRuleChange(rule, nil) }, }) @@ -520,8 +608,8 @@ func (tc *Controller) Run(ctx context.Context) error { logger.Error(nil, "Expected ResourceSlice", "actual", fmt.Sprintf("%T", obj)) return } - mutex.Lock() - defer mutex.Unlock() + tc.mutex.Lock() + defer tc.mutex.Unlock() tc.handleSliceChange(nil, slice) }, UpdateFunc: func(oldObj, newObj any) { @@ -534,8 +622,8 @@ func (tc *Controller) Run(ctx context.Context) error { if !ok { logger.Error(nil, "Expected ResourceSlice", "actual", fmt.Sprintf("%T", newObj)) } - mutex.Lock() - defer mutex.Unlock() + tc.mutex.Lock() + defer tc.mutex.Unlock() tc.handleSliceChange(oldSlice, newSlice) }, DeleteFunc: func(obj any) { @@ -545,8 +633,8 @@ func (tc *Controller) Run(ctx context.Context) error { logger.Error(nil, "Expected ResourceSlice", "actual", fmt.Sprintf("%T", obj)) return } - mutex.Lock() - defer mutex.Unlock() + tc.mutex.Lock() + defer tc.mutex.Unlock() tc.handleSliceChange(slice, nil) }, }) @@ -565,10 +653,50 @@ func (tc *Controller) Run(ctx context.Context) error { logger.V(1).Info("Underlying informers have synced") tc.hasSynced.Store(1) + for i := range numWorkers { + wg.Go(func() { + tc.worker(klog.NewContext(ctx, klog.LoggerWithName(queueLogger, fmt.Sprintf("worker-%d", i)))) + }) + } + <-ctx.Done() return nil } +// worker blocks until the workqueue is shut down. +// Cancellation of the context only aborts on-going work. +func (tc *Controller) worker(ctx context.Context) { + logger := klog.FromContext(ctx) + defer utilruntime.HandleCrashWithLogger(logger) + + for { + item, shutdown := tc.workqueue.Get() + if shutdown { + return + } + + func() { + defer tc.workqueue.Done(item) + + var err error + var againAfter time.Duration + if item.podRef.Name != "" { + againAfter, err = tc.maybeDeletePod(ctx, item.podRef) + } + switch { + case err != nil: + logger.V(3).Info("Evicting pod failed, will retry", "err", err) + tc.workqueue.AddRateLimited(item) + case againAfter > 0: + logger.V(5).Info("Checking pod eviction again later", "delay", againAfter) + tc.workqueue.AddAfter(item, againAfter) + default: + tc.workqueue.Forget(item) + } + }() + } +} + func (tc *Controller) handleClaimChange(oldClaim, newClaim *resourceapi.ResourceClaim) { claim := newClaim if claim == nil { diff --git a/pkg/controller/devicetainteviction/device_taint_eviction_test.go b/pkg/controller/devicetainteviction/device_taint_eviction_test.go index 9795925ad40..fdf616d0d75 100644 --- a/pkg/controller/devicetainteviction/device_taint_eviction_test.go +++ b/pkg/controller/devicetainteviction/device_taint_eviction_test.go @@ -1593,22 +1593,45 @@ func testCancelEviction(tCtx ktesting.TContext, deletePod bool) { slice := sliceTainted.DeepCopy() slice.Spec.Devices[0].Taints[0].TimeAdded = &metav1.Time{Time: time.Now()} claim := inUseClaim.DeepCopy() + tolerationSeconds := int64(60) claim.Status.Allocation.Devices.Results[0].Tolerations = []resourceapi.DeviceToleration{{ Operator: resourceapi.DeviceTolerationOpExists, Effect: resourceapi.DeviceTaintEffectNoExecute, - TolerationSeconds: ptr.To(int64(60)), + TolerationSeconds: &tolerationSeconds, }} fakeClientset := fake.NewSimpleClientset( slice, claim, pod, ) - tCtx = ktesting.WithClients(tCtx, nil, nil, fakeClientset, nil, nil) - pod, err := fakeClientset.CoreV1().Pods(pod.Namespace).Get(tCtx, pod.Name, metav1.GetOptions{}) require.NoError(tCtx, err, "get pod before eviction") assert.Equal(tCtx, podWithClaimName, pod, "test pod") + var podGets int + var podUpdates int + var podDeletions int + + fakeClientset.PrependReactor("get", "pods", func(action core.Action) (handled bool, ret runtime.Object, err error) { + podGets++ + podName := action.(core.GetAction).GetName() + assert.Equal(tCtx, podWithClaimName.Name, podName, "name of pod to patch") + return false, nil, nil + }) + fakeClientset.PrependReactor("patch", "pods", func(action core.Action) (handled bool, ret runtime.Object, err error) { + podUpdates++ + podName := action.(core.PatchAction).GetName() + assert.Equal(tCtx, podWithClaimName.Name, podName, "name of pod to get") + return false, nil, nil + }) + fakeClientset.PrependReactor("delete", "pods", func(action core.Action) (handled bool, ret runtime.Object, err error) { + podDeletions++ + podName := action.(core.DeleteAction).GetName() + assert.Equal(tCtx, podWithClaimName.Name, podName, "name of pod to delete") + return false, nil, nil + }) + + tCtx = ktesting.WithClients(tCtx, nil, nil, fakeClientset, nil, nil) controller := newTestController(tCtx, fakeClientset) var mutex sync.Mutex @@ -1666,10 +1689,29 @@ func testCancelEviction(tCtx ktesting.TContext, deletePod bool) { ktesting.Eventually(tCtx, listEvents).WithTimeout(30 * time.Second).Should(matchCancellationEvent()) } tCtx.Wait() + matchEvents := matchCancellationEvent() if deletePod { matchEvents = gomega.BeEmpty() + assert.Equal(tCtx, 1, podDeletions, "Pod should have been deleted exactly once by test.") + } else { + assert.Equal(tCtx, 0, podDeletions, "Pod should not have been deleted.") } + + // Naively (?) one could expect synctest.Wait to have blocked until the work item added via AddAfter + // got processed because before that the overall state isn't stable yet. But the workqueue package + // seems to implement AddAfter in a way which is not detected as "blocking on time to pass" by + // by synctest and therefore it returns without advancing time enough. + // + // Here we trigger that manually as a workaround (?). The factor doesn't really matter. + // Commenting this out causes the controller.maybeDeletePodCount check to fail. + time.Sleep(10 * time.Duration(tolerationSeconds) * time.Second) + tCtx.Wait() + + assert.Equal(tCtx, 0, podGets, "Worker should not have needed to get the pod.") + assert.Equal(tCtx, 0, podUpdates, "Worker should not have needed to update the pod.") + assert.Equal(tCtx, 0, controller.workqueue.Len(), "Work queue should be empty now.") + assert.Equal(tCtx, int64(1), controller.maybeDeletePodCount, "Work queue should have processed pod.") gomega.NewWithT(tCtx).Expect(listEvents(tCtx)).Should(matchEvents) tCtx.ExpectNoError(testPodDeletionsMetrics(controller, 0)) } @@ -1822,78 +1864,6 @@ func TestRetry(t *testing.T) { }) } -// TestRetry covers the scenario that an eviction attempt fails. -func TestEvictionFailure(t *testing.T) { - tCtx := ktesting.Init(t) - - tCtx.SyncTest("", func(tCtx ktesting.TContext) { - // This scenario is the same as "evict-pod-resourceclaim" above. - pod := podWithClaimName.DeepCopy() - fakeClientset := fake.NewSimpleClientset( - sliceTainted, - slice2, - inUseClaim, - pod, - ) - tCtx = ktesting.WithClients(tCtx, nil, nil, fakeClientset, nil, nil) - - pod, err := fakeClientset.CoreV1().Pods(pod.Namespace).Get(tCtx, pod.Name, metav1.GetOptions{}) - require.NoError(tCtx, err, "get pod before eviction") - assert.Equal(tCtx, podWithClaimName, pod, "test pod") - - var mutex sync.Mutex - var podGets int - var podDeletions int - - fakeClientset.PrependReactor("get", "pods", func(action core.Action) (handled bool, ret runtime.Object, err error) { - mutex.Lock() - defer mutex.Unlock() - podGets++ - podName := action.(core.GetAction).GetName() - assert.Equal(t, podWithClaimName.Name, podName, "name of patched pod") - return false, nil, nil - }) - fakeClientset.PrependReactor("delete", "pods", func(action core.Action) (handled bool, ret runtime.Object, err error) { - mutex.Lock() - defer mutex.Unlock() - podDeletions++ - podName := action.(core.DeleteAction).GetName() - assert.Equal(t, podWithClaimName.Name, podName, "name of deleted pod") - return true, nil, apierrors.NewInternalError(errors.New("fake error")) - }) - controller := newTestController(tCtx, fakeClientset) - - var wg sync.WaitGroup - defer func() { - t.Log("Waiting for goroutine termination...") - tCtx.Cancel("time to stop") - wg.Wait() - }() - wg.Add(1) - go func() { - defer wg.Done() - assert.NoError(tCtx, controller.Run(tCtx), "eviction controller failed") - }() - - // Block until eviction has started. - // Eventually deletion is attempted a few times. - ktesting.Eventually(tCtx, func(tCtx ktesting.TContext) int { - mutex.Lock() - defer mutex.Unlock() - return podDeletions - }).WithTimeout(30*time.Second).Should(gomega.BeNumerically(">=", retries), "pod eviction failed") - - // Now we can check the API calls. - // The background goroutined must be done when Wait returns, - // otherwise Wait wouldn't return. - tCtx.Wait() - assert.Equal(tCtx, retries, podGets, "number of pod get calls") - assert.Equal(tCtx, retries, podDeletions, "number of pod delete calls") - gomega.NewWithT(tCtx).Expect(listEvents(tCtx)).Should(matchDeletionEvent()) - tCtx.ExpectNoError(testPodDeletionsMetrics(controller, 0)) - }) -} - // BenchTaintUntaint checks the full flow of detecting a claim as // tainted because of a new DeviceTaintRule, starting to evict its // consumer, and then undoing that when the DeviceTaintRule is removed. From f4a453389d25974b62ec2e45508b2150a9192b30 Mon Sep 17 00:00:00 2001 From: Patrick Ohly Date: Mon, 27 Oct 2025 08:40:44 +0100 Subject: [PATCH 08/11] DRA device taint eviction: configurable number of workers It might never be necessary to change the default, but it is hard to be sure. It's better to have the option, just in case. --- api/api-rules/violation_exceptions.list | 2 + cmd/kube-controller-manager/app/core.go | 2 +- .../options/devicetaintevictioncontroller.go | 63 +++++ .../devicetaintevictioncontroller_test.go | 219 ++++++++++++++++++ .../app/options/options.go | 9 + .../app/options/options_test.go | 19 ++ pkg/controller/apis/config/types.go | 15 +- .../apis/config/v1alpha1/defaults.go | 3 + .../devicetainteviction/config/doc.go | 19 ++ .../devicetainteviction/config/types.go | 26 +++ .../config/v1alpha1/conversion.go | 40 ++++ .../config/v1alpha1/defaults.go | 41 ++++ .../config/v1alpha1/defaults_test.go | 31 +++ .../config/v1alpha1/doc.go | 21 ++ .../config/v1alpha1/register.go | 31 +++ .../device_taint_eviction.go | 11 +- .../device_taint_eviction_test.go | 8 +- 17 files changed, 539 insertions(+), 21 deletions(-) create mode 100644 cmd/kube-controller-manager/app/options/devicetaintevictioncontroller.go create mode 100644 cmd/kube-controller-manager/app/options/devicetaintevictioncontroller_test.go create mode 100644 pkg/controller/devicetainteviction/config/doc.go create mode 100644 pkg/controller/devicetainteviction/config/types.go create mode 100644 pkg/controller/devicetainteviction/config/v1alpha1/conversion.go create mode 100644 pkg/controller/devicetainteviction/config/v1alpha1/defaults.go create mode 100644 pkg/controller/devicetainteviction/config/v1alpha1/defaults_test.go create mode 100644 pkg/controller/devicetainteviction/config/v1alpha1/doc.go create mode 100644 pkg/controller/devicetainteviction/config/v1alpha1/register.go diff --git a/api/api-rules/violation_exceptions.list b/api/api-rules/violation_exceptions.list index 233f87be0d7..6d16fc9fda7 100644 --- a/api/api-rules/violation_exceptions.list +++ b/api/api-rules/violation_exceptions.list @@ -172,6 +172,7 @@ API rule violation: names_match,k8s.io/kube-controller-manager/config/v1alpha1,C API rule violation: names_match,k8s.io/kube-controller-manager/config/v1alpha1,CronJobControllerConfiguration,ConcurrentCronJobSyncs API rule violation: names_match,k8s.io/kube-controller-manager/config/v1alpha1,DaemonSetControllerConfiguration,ConcurrentDaemonSetSyncs API rule violation: names_match,k8s.io/kube-controller-manager/config/v1alpha1,DeploymentControllerConfiguration,ConcurrentDeploymentSyncs +API rule violation: names_match,k8s.io/kube-controller-manager/config/v1alpha1,DeviceTaintEvictionControllerConfiguration,ConcurrentSyncs API rule violation: names_match,k8s.io/kube-controller-manager/config/v1alpha1,EndpointControllerConfiguration,ConcurrentEndpointSyncs API rule violation: names_match,k8s.io/kube-controller-manager/config/v1alpha1,EndpointControllerConfiguration,EndpointUpdatesBatchPeriod API rule violation: names_match,k8s.io/kube-controller-manager/config/v1alpha1,EndpointSliceControllerConfiguration,ConcurrentServiceEndpointSyncs @@ -199,6 +200,7 @@ API rule violation: names_match,k8s.io/kube-controller-manager/config/v1alpha1,K API rule violation: names_match,k8s.io/kube-controller-manager/config/v1alpha1,KubeControllerManagerConfiguration,DaemonSetController API rule violation: names_match,k8s.io/kube-controller-manager/config/v1alpha1,KubeControllerManagerConfiguration,DeploymentController API rule violation: names_match,k8s.io/kube-controller-manager/config/v1alpha1,KubeControllerManagerConfiguration,DeprecatedController +API rule violation: names_match,k8s.io/kube-controller-manager/config/v1alpha1,KubeControllerManagerConfiguration,DeviceTaintEvictionController API rule violation: names_match,k8s.io/kube-controller-manager/config/v1alpha1,KubeControllerManagerConfiguration,EndpointController API rule violation: names_match,k8s.io/kube-controller-manager/config/v1alpha1,KubeControllerManagerConfiguration,EndpointSliceController API rule violation: names_match,k8s.io/kube-controller-manager/config/v1alpha1,KubeControllerManagerConfiguration,EndpointSliceMirroringController diff --git a/cmd/kube-controller-manager/app/core.go b/cmd/kube-controller-manager/app/core.go index 676e8ea03ed..54a9df182fc 100644 --- a/cmd/kube-controller-manager/app/core.go +++ b/cmd/kube-controller-manager/app/core.go @@ -276,7 +276,7 @@ func newDeviceTaintEvictionController(ctx context.Context, controllerContext Con controllerName, ) return newControllerLoop(func(ctx context.Context) { - if err := deviceTaintEvictionController.Run(ctx); err != nil { + if err := deviceTaintEvictionController.Run(ctx, int(controllerContext.ComponentConfig.DeviceTaintEvictionController.ConcurrentSyncs)); err != nil { klog.FromContext(ctx).Error(err, "Device taint processing leading to Pod eviction failed and is now paused") } <-ctx.Done() diff --git a/cmd/kube-controller-manager/app/options/devicetaintevictioncontroller.go b/cmd/kube-controller-manager/app/options/devicetaintevictioncontroller.go new file mode 100644 index 00000000000..64bd263ed4d --- /dev/null +++ b/cmd/kube-controller-manager/app/options/devicetaintevictioncontroller.go @@ -0,0 +1,63 @@ +/* +Copyright The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package options + +import ( + "fmt" + + "github.com/spf13/pflag" + + devicetaintevictionconfig "k8s.io/kubernetes/pkg/controller/devicetainteviction/config" +) + +// DeviceTaintEvictionControllerOptions holds the DeviceTaintEvictionController options. +type DeviceTaintEvictionControllerOptions struct { + *devicetaintevictionconfig.DeviceTaintEvictionControllerConfiguration +} + +// AddFlags adds flags related to DeviceTaintEvictionController for controller manager to the specified FlagSet. +func (o *DeviceTaintEvictionControllerOptions) AddFlags(fs *pflag.FlagSet) { + if o == nil { + return + } + + fs.Int32Var(&o.ConcurrentSyncs, "concurrent-device-taint-eviction-syncs", o.ConcurrentSyncs, "The number of operations (evicting pods, updating DeviceTaintRule status) allowed to run concurrently. Greater number = more responsive, but more CPU (and network) load") +} + +// ApplyTo fills up DeviceTaintEvictionController config with options. +func (o *DeviceTaintEvictionControllerOptions) ApplyTo(cfg *devicetaintevictionconfig.DeviceTaintEvictionControllerConfiguration) error { + if o == nil { + return nil + } + + cfg.ConcurrentSyncs = o.ConcurrentSyncs + + return nil +} + +// Validate checks validation of DeviceTaintEvictionControllerOptions. +func (o *DeviceTaintEvictionControllerOptions) Validate() []error { + if o == nil { + return nil + } + + var errs []error + if o.ConcurrentSyncs <= 0 { + errs = append(errs, fmt.Errorf("concurrent-device-taint-eviction-syncs must be greater than zero, got %d", o.ConcurrentSyncs)) + } + return errs +} diff --git a/cmd/kube-controller-manager/app/options/devicetaintevictioncontroller_test.go b/cmd/kube-controller-manager/app/options/devicetaintevictioncontroller_test.go new file mode 100644 index 00000000000..a804ad7675c --- /dev/null +++ b/cmd/kube-controller-manager/app/options/devicetaintevictioncontroller_test.go @@ -0,0 +1,219 @@ +/* +Copyright 2025 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package options + +import ( + "reflect" + "strings" + "testing" + + "github.com/spf13/pflag" + utilerrors "k8s.io/apimachinery/pkg/util/errors" + + devicetaintevictionconfig "k8s.io/kubernetes/pkg/controller/devicetainteviction/config" +) + +func TestDeviceTaintEvictionControllerOptions_AddFlags(t *testing.T) { + fs := pflag.NewFlagSet("test", pflag.ContinueOnError) + opts := &DeviceTaintEvictionControllerOptions{ + &devicetaintevictionconfig.DeviceTaintEvictionControllerConfiguration{ + ConcurrentSyncs: 50, + }, + } + + opts.AddFlags(fs) + + // Test that the flag was added + flag := fs.Lookup("concurrent-device-taint-eviction-syncs") + if flag == nil { + t.Error("concurrent-device-taint-eviction-syncs flag was not added") + return + } + + // Test that the flag has the correct default value + if flag.DefValue != "50" { + t.Errorf("expected default value 50, got %s", flag.DefValue) + } + + // Test flag parsing + args := []string{"--concurrent-device-taint-eviction-syncs=25"} + if err := fs.Parse(args); err != nil { + t.Errorf("failed to parse flags: %v", err) + } + + if opts.ConcurrentSyncs != 25 { + t.Errorf("expected ConcurrentSyncs to be 25, got %d", opts.ConcurrentSyncs) + } +} + +func TestDeviceTaintEvictionControllerOptions_AddFlags_Nil(t *testing.T) { + fs := pflag.NewFlagSet("test", pflag.ContinueOnError) + var opts *DeviceTaintEvictionControllerOptions + + // Should not panic when options is nil + opts.AddFlags(fs) + + // Flag should not be added + flag := fs.Lookup("concurrent-device-taint-eviction-syncs") + if flag != nil { + t.Error("concurrent-device-taint-eviction-syncs flag should not be added when options is nil") + } +} + +func TestDeviceTaintEvictionControllerOptions_ApplyTo(t *testing.T) { + opts := &DeviceTaintEvictionControllerOptions{ + &devicetaintevictionconfig.DeviceTaintEvictionControllerConfiguration{ + ConcurrentSyncs: 75, + }, + } + + cfg := &devicetaintevictionconfig.DeviceTaintEvictionControllerConfiguration{} + + err := opts.ApplyTo(cfg) + if err != nil { + t.Errorf("unexpected error: %v", err) + } + + if cfg.ConcurrentSyncs != 75 { + t.Errorf("expected ConcurrentSyncs to be 75, got %d", cfg.ConcurrentSyncs) + } +} + +func TestDeviceTaintEvictionControllerOptions_ApplyTo_Nil(t *testing.T) { + var opts *DeviceTaintEvictionControllerOptions + cfg := &devicetaintevictionconfig.DeviceTaintEvictionControllerConfiguration{ + ConcurrentSyncs: 50, + } + + err := opts.ApplyTo(cfg) + if err != nil { + t.Errorf("unexpected error: %v", err) + } + + // Configuration should remain unchanged + if cfg.ConcurrentSyncs != 50 { + t.Errorf("expected ConcurrentSyncs to remain 50, got %d", cfg.ConcurrentSyncs) + } +} + +func TestDeviceTaintEvictionControllerOptions_Validate(t *testing.T) { + testCases := []struct { + name string + concurrentSyncs int32 + expectErrors bool + expectedErrorSubString string + }{ + { + name: "valid concurrent syncs", + concurrentSyncs: 50, + expectErrors: false, + }, + { + name: "valid minimum concurrent syncs", + concurrentSyncs: 1, + expectErrors: false, + }, + { + name: "invalid zero concurrent syncs", + concurrentSyncs: 0, + expectErrors: true, + expectedErrorSubString: "concurrent-device-taint-eviction-syncs must be greater than zero, got 0", + }, + { + name: "invalid negative concurrent syncs", + concurrentSyncs: -5, + expectErrors: true, + expectedErrorSubString: "concurrent-device-taint-eviction-syncs must be greater than zero, got -5", + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + opts := &DeviceTaintEvictionControllerOptions{ + &devicetaintevictionconfig.DeviceTaintEvictionControllerConfiguration{ + ConcurrentSyncs: tc.concurrentSyncs, + }, + } + + errs := opts.Validate() + + if tc.expectErrors && len(errs) == 0 { + t.Error("expected validation errors, but got none") + } + + if !tc.expectErrors && len(errs) > 0 { + t.Errorf("expected no validation errors, but got: %v", errs) + } + + if tc.expectErrors && len(errs) > 0 { + gotErr := utilerrors.NewAggregate(errs).Error() + if !strings.Contains(gotErr, tc.expectedErrorSubString) { + t.Errorf("expected error to contain %q, but got %q", tc.expectedErrorSubString, gotErr) + } + } + }) + } +} + +func TestDeviceTaintEvictionControllerOptions_Validate_Nil(t *testing.T) { + var opts *DeviceTaintEvictionControllerOptions + + errs := opts.Validate() + if len(errs) != 0 { + t.Errorf("expected no validation errors for nil options, but got: %v", errs) + } +} + +func TestDeviceTaintEvictionControllerOptions_Integration(t *testing.T) { + // Test the complete workflow: create options, set flags, apply to config + fs := pflag.NewFlagSet("test", pflag.ContinueOnError) + opts := &DeviceTaintEvictionControllerOptions{ + &devicetaintevictionconfig.DeviceTaintEvictionControllerConfiguration{ + ConcurrentSyncs: 50, + }, + } + + // Add flags + opts.AddFlags(fs) + + // Parse flags with custom value + args := []string{"--concurrent-device-taint-eviction-syncs=100"} + if err := fs.Parse(args); err != nil { + t.Fatalf("failed to parse flags: %v", err) + } + + // Validate + errs := opts.Validate() + if len(errs) > 0 { + t.Fatalf("validation failed: %v", errs) + } + + // Apply to config + cfg := &devicetaintevictionconfig.DeviceTaintEvictionControllerConfiguration{} + if err := opts.ApplyTo(cfg); err != nil { + t.Fatalf("failed to apply options: %v", err) + } + + // Verify final configuration + expected := &devicetaintevictionconfig.DeviceTaintEvictionControllerConfiguration{ + ConcurrentSyncs: 100, + } + + if !reflect.DeepEqual(cfg, expected) { + t.Errorf("expected config %+v, got %+v", expected, cfg) + } +} diff --git a/cmd/kube-controller-manager/app/options/options.go b/cmd/kube-controller-manager/app/options/options.go index c599ff58bd6..b6767c067a5 100644 --- a/cmd/kube-controller-manager/app/options/options.go +++ b/cmd/kube-controller-manager/app/options/options.go @@ -77,6 +77,7 @@ type KubeControllerManagerOptions struct { CSRSigningController *CSRSigningControllerOptions DaemonSetController *DaemonSetControllerOptions DeploymentController *DeploymentControllerOptions + DeviceTaintEvictionController *DeviceTaintEvictionControllerOptions StatefulSetController *StatefulSetControllerOptions DeprecatedFlags *DeprecatedControllerOptions EndpointController *EndpointControllerOptions @@ -151,6 +152,9 @@ func NewKubeControllerManagerOptions() (*KubeControllerManagerOptions, error) { DeploymentController: &DeploymentControllerOptions{ &componentConfig.DeploymentController, }, + DeviceTaintEvictionController: &DeviceTaintEvictionControllerOptions{ + &componentConfig.DeviceTaintEvictionController, + }, StatefulSetController: &StatefulSetControllerOptions{ &componentConfig.StatefulSetController, }, @@ -272,6 +276,7 @@ func (s *KubeControllerManagerOptions) Flags(allControllers []string, disabledBy s.AttachDetachController.AddFlags(fss.FlagSet(names.PersistentVolumeAttachDetachController)) s.CSRSigningController.AddFlags(fss.FlagSet(names.CertificateSigningRequestSigningController)) s.DeploymentController.AddFlags(fss.FlagSet(names.DeploymentController)) + s.DeviceTaintEvictionController.AddFlags(fss.FlagSet(names.DeviceTaintEvictionController)) s.StatefulSetController.AddFlags(fss.FlagSet(names.StatefulSetController)) s.DaemonSetController.AddFlags(fss.FlagSet(names.DaemonSetController)) s.DeprecatedFlags.AddFlags(fss.FlagSet("deprecated")) @@ -341,6 +346,9 @@ func (s *KubeControllerManagerOptions) ApplyTo(c *kubecontrollerconfig.Config, a if err := s.DeploymentController.ApplyTo(&c.ComponentConfig.DeploymentController); err != nil { return err } + if err := s.DeviceTaintEvictionController.ApplyTo(&c.ComponentConfig.DeviceTaintEvictionController); err != nil { + return err + } if err := s.StatefulSetController.ApplyTo(&c.ComponentConfig.StatefulSetController); err != nil { return err } @@ -440,6 +448,7 @@ func (s *KubeControllerManagerOptions) Validate(allControllers []string, disable errs = append(errs, s.CSRSigningController.Validate()...) errs = append(errs, s.DaemonSetController.Validate()...) errs = append(errs, s.DeploymentController.Validate()...) + errs = append(errs, s.DeviceTaintEvictionController.Validate()...) errs = append(errs, s.StatefulSetController.Validate()...) errs = append(errs, s.DeprecatedFlags.Validate()...) errs = append(errs, s.EndpointController.Validate()...) diff --git a/cmd/kube-controller-manager/app/options/options_test.go b/cmd/kube-controller-manager/app/options/options_test.go index 4773fd925b3..d9311700832 100644 --- a/cmd/kube-controller-manager/app/options/options_test.go +++ b/cmd/kube-controller-manager/app/options/options_test.go @@ -56,6 +56,7 @@ import ( cronjobconfig "k8s.io/kubernetes/pkg/controller/cronjob/config" daemonconfig "k8s.io/kubernetes/pkg/controller/daemon/config" deploymentconfig "k8s.io/kubernetes/pkg/controller/deployment/config" + devicetaintevictionconfig "k8s.io/kubernetes/pkg/controller/devicetainteviction/config" endpointconfig "k8s.io/kubernetes/pkg/controller/endpoint/config" endpointsliceconfig "k8s.io/kubernetes/pkg/controller/endpointslice/config" endpointslicemirroringconfig "k8s.io/kubernetes/pkg/controller/endpointslicemirroring/config" @@ -98,6 +99,7 @@ var args = []string{ "--cluster-signing-legacy-unknown-cert-file=/cluster-signing-legacy-unknown/cert-file", "--cluster-signing-legacy-unknown-key-file=/cluster-signing-legacy-unknown/key-file", "--concurrent-deployment-syncs=10", + "--concurrent-device-taint-eviction-syncs=10", "--concurrent-daemonset-syncs=10", "--concurrent-horizontal-pod-autoscaler-syncs=10", "--concurrent-statefulset-syncs=15", @@ -273,6 +275,11 @@ func TestAddFlags(t *testing.T) { ConcurrentDeploymentSyncs: 10, }, }, + DeviceTaintEvictionController: &DeviceTaintEvictionControllerOptions{ + &devicetaintevictionconfig.DeviceTaintEvictionControllerConfiguration{ + ConcurrentSyncs: 10, + }, + }, StatefulSetController: &StatefulSetControllerOptions{ &statefulsetconfig.StatefulSetControllerConfiguration{ ConcurrentStatefulSetSyncs: 15, @@ -624,6 +631,9 @@ func TestApplyTo(t *testing.T) { DeploymentController: deploymentconfig.DeploymentControllerConfiguration{ ConcurrentDeploymentSyncs: 10, }, + DeviceTaintEvictionController: devicetaintevictionconfig.DeviceTaintEvictionControllerConfiguration{ + ConcurrentSyncs: 10, + }, StatefulSetController: statefulsetconfig.StatefulSetControllerConfiguration{ ConcurrentStatefulSetSyncs: 15, }, @@ -1262,6 +1272,15 @@ func TestValidateControllersOptions(t *testing.T) { }, }, }, + { + name: "DeviceTaintEvictionControllerOptions", + expectErrors: false, + options: &DeviceTaintEvictionControllerOptions{ + &devicetaintevictionconfig.DeviceTaintEvictionControllerConfiguration{ + ConcurrentSyncs: 10, + }, + }, + }, { name: "DeprecatedControllerOptions", expectErrors: false, diff --git a/pkg/controller/apis/config/types.go b/pkg/controller/apis/config/types.go index 880a4fd4928..3be7f98a5c4 100644 --- a/pkg/controller/apis/config/types.go +++ b/pkg/controller/apis/config/types.go @@ -25,6 +25,7 @@ import ( cronjobconfig "k8s.io/kubernetes/pkg/controller/cronjob/config" daemonconfig "k8s.io/kubernetes/pkg/controller/daemon/config" deploymentconfig "k8s.io/kubernetes/pkg/controller/deployment/config" + devicetaintevictionconfig "k8s.io/kubernetes/pkg/controller/devicetainteviction/config" endpointconfig "k8s.io/kubernetes/pkg/controller/endpoint/config" endpointsliceconfig "k8s.io/kubernetes/pkg/controller/endpointslice/config" endpointslicemirroringconfig "k8s.io/kubernetes/pkg/controller/endpointslicemirroring/config" @@ -62,6 +63,9 @@ type KubeControllerManagerConfiguration struct { // AttachDetachControllerConfiguration holds configuration for // AttachDetachController related features. AttachDetachController attachdetachconfig.AttachDetachControllerConfiguration + // CronJobControllerConfiguration holds configuration for CronJobController + // related features. + CronJobController cronjobconfig.CronJobControllerConfiguration // CSRSigningControllerConfiguration holds configuration for // CSRSigningController related features. CSRSigningController csrsigningconfig.CSRSigningControllerConfiguration @@ -71,9 +75,8 @@ type KubeControllerManagerConfiguration struct { // DeploymentControllerConfiguration holds configuration for // DeploymentController related features. DeploymentController deploymentconfig.DeploymentControllerConfiguration - // StatefulSetControllerConfiguration holds configuration for - // StatefulSetController related features. - StatefulSetController statefulsetconfig.StatefulSetControllerConfiguration + // DeviceTaintEvictionControllerConfiguration contains elements configuring the device taint eviction controller. + DeviceTaintEvictionController devicetaintevictionconfig.DeviceTaintEvictionControllerConfiguration // DeprecatedControllerConfiguration holds configuration for some deprecated // features. DeprecatedController DeprecatedControllerConfiguration @@ -96,9 +99,6 @@ type KubeControllerManagerConfiguration struct { HPAController poautosclerconfig.HPAControllerConfiguration // JobControllerConfiguration holds configuration for JobController related features. JobController jobconfig.JobControllerConfiguration - // CronJobControllerConfiguration holds configuration for CronJobController - // related features. - CronJobController cronjobconfig.CronJobControllerConfiguration // LegacySATokenCleanerConfiguration holds configuration for LegacySATokenCleaner related features. LegacySATokenCleaner serviceaccountconfig.LegacySATokenCleanerConfiguration // NamespaceControllerConfiguration holds configuration for NamespaceController @@ -130,6 +130,9 @@ type KubeControllerManagerConfiguration struct { // ServiceControllerConfiguration holds configuration for ServiceController // related features. ServiceController serviceconfig.ServiceControllerConfiguration + // StatefulSetControllerConfiguration holds configuration for + // StatefulSetController related features. + StatefulSetController statefulsetconfig.StatefulSetControllerConfiguration // TTLAfterFinishedControllerConfiguration holds configuration for // TTLAfterFinishedController related features. TTLAfterFinishedController ttlafterfinishedconfig.TTLAfterFinishedControllerConfiguration diff --git a/pkg/controller/apis/config/v1alpha1/defaults.go b/pkg/controller/apis/config/v1alpha1/defaults.go index c5a6ccef62f..d0d4fd160ae 100644 --- a/pkg/controller/apis/config/v1alpha1/defaults.go +++ b/pkg/controller/apis/config/v1alpha1/defaults.go @@ -25,6 +25,7 @@ import ( cronjobconfigv1alpha1 "k8s.io/kubernetes/pkg/controller/cronjob/config/v1alpha1" daemonconfigv1alpha1 "k8s.io/kubernetes/pkg/controller/daemon/config/v1alpha1" deploymentconfigv1alpha1 "k8s.io/kubernetes/pkg/controller/deployment/config/v1alpha1" + devicetaintevictionconfigv1alpha1 "k8s.io/kubernetes/pkg/controller/devicetainteviction/config/v1alpha1" endpointconfigv1alpha1 "k8s.io/kubernetes/pkg/controller/endpoint/config/v1alpha1" endpointsliceconfigv1alpha1 "k8s.io/kubernetes/pkg/controller/endpointslice/config/v1alpha1" endpointslicemirroringconfigv1alpha1 "k8s.io/kubernetes/pkg/controller/endpointslicemirroring/config/v1alpha1" @@ -71,6 +72,8 @@ func SetDefaults_KubeControllerManagerConfiguration(obj *kubectrlmgrconfigv1alph daemonconfigv1alpha1.RecommendedDefaultDaemonSetControllerConfiguration(&obj.DaemonSetController) // Use the default RecommendedDefaultDeploymentControllerConfiguration options deploymentconfigv1alpha1.RecommendedDefaultDeploymentControllerConfiguration(&obj.DeploymentController) + // Use the default RecommendedDefaultDeviceTaintEvictionControllerConfiguration options + devicetaintevictionconfigv1alpha1.RecommendedDefaultDeviceTaintEvictionControllerConfiguration(&obj.DeviceTaintEvictionController) // Use the default RecommendedDefaultStatefulSetControllerConfiguration options statefulsetconfigv1alpha1.RecommendedDefaultStatefulSetControllerConfiguration(&obj.StatefulSetController) // Use the default RecommendedDefaultEndpointControllerConfiguration options diff --git a/pkg/controller/devicetainteviction/config/doc.go b/pkg/controller/devicetainteviction/config/doc.go new file mode 100644 index 00000000000..e605045a401 --- /dev/null +++ b/pkg/controller/devicetainteviction/config/doc.go @@ -0,0 +1,19 @@ +/* +Copyright The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// +k8s:deepcopy-gen=package + +package config diff --git a/pkg/controller/devicetainteviction/config/types.go b/pkg/controller/devicetainteviction/config/types.go new file mode 100644 index 00000000000..8bc0544ae0e --- /dev/null +++ b/pkg/controller/devicetainteviction/config/types.go @@ -0,0 +1,26 @@ +/* +Copyright The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package config + +// DeviceTaintEvictionControllerConfiguration contains elements configuring the device taint eviction controller. +type DeviceTaintEvictionControllerConfiguration struct { + // ConcurrentSyncs is the number of operations (deleting a pod, updating a ResourcClaim status, etc.) + // that will be done concurrently. Larger number = processing, but more CPU (and network) load. + // + // The default is 10. + ConcurrentSyncs int32 +} diff --git a/pkg/controller/devicetainteviction/config/v1alpha1/conversion.go b/pkg/controller/devicetainteviction/config/v1alpha1/conversion.go new file mode 100644 index 00000000000..6b441c434c1 --- /dev/null +++ b/pkg/controller/devicetainteviction/config/v1alpha1/conversion.go @@ -0,0 +1,40 @@ +/* +Copyright The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package v1alpha1 + +import ( + "k8s.io/apimachinery/pkg/conversion" + "k8s.io/kube-controller-manager/config/v1alpha1" + "k8s.io/kubernetes/pkg/controller/devicetainteviction/config" +) + +// Important! The public back-and-forth conversion functions for the types in this package +// with DeviceTaintEvictionControllerConfiguration types need to be manually exposed like this in order for +// other packages that reference this package to be able to call these conversion functions +// in an autogenerated manner. +// TODO: Fix the bug in conversion-gen so it automatically discovers these Convert_* functions +// in autogenerated code as well. + +// Convert_v1alpha1_DeviceTaintEvictionControllerConfiguration_To_config_DeviceTaintEvictionControllerConfiguration is an autogenerated conversion function. +func Convert_v1alpha1_DeviceTaintEvictionControllerConfiguration_To_config_DeviceTaintEvictionControllerConfiguration(in *v1alpha1.DeviceTaintEvictionControllerConfiguration, out *config.DeviceTaintEvictionControllerConfiguration, s conversion.Scope) error { + return autoConvert_v1alpha1_DeviceTaintEvictionControllerConfiguration_To_config_DeviceTaintEvictionControllerConfiguration(in, out, s) +} + +// Convert_config_DeviceTaintEvictionControllerConfiguration_To_v1alpha1_DeviceTaintEvictionControllerConfiguration is an autogenerated conversion function. +func Convert_config_DeviceTaintEvictionControllerConfiguration_To_v1alpha1_DeviceTaintEvictionControllerConfiguration(in *config.DeviceTaintEvictionControllerConfiguration, out *v1alpha1.DeviceTaintEvictionControllerConfiguration, s conversion.Scope) error { + return autoConvert_config_DeviceTaintEvictionControllerConfiguration_To_v1alpha1_DeviceTaintEvictionControllerConfiguration(in, out, s) +} diff --git a/pkg/controller/devicetainteviction/config/v1alpha1/defaults.go b/pkg/controller/devicetainteviction/config/v1alpha1/defaults.go new file mode 100644 index 00000000000..e9295e561f7 --- /dev/null +++ b/pkg/controller/devicetainteviction/config/v1alpha1/defaults.go @@ -0,0 +1,41 @@ +/* +Copyright The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package v1alpha1 + +import ( + kubectrlmgrconfigv1alpha1 "k8s.io/kube-controller-manager/config/v1alpha1" +) + +// RecommendedDefaultDeviceTaintEvictionControllerConfiguration defaults a pointer to a +// DeviceTaintEvictionControllerConfiguration struct. This will set the recommended default +// values, but they may be subject to change between API versions. This function +// is intentionally not registered in the scheme as a "normal" `SetDefaults_Foo` +// function to allow consumers of this type to set whatever defaults for their +// embedded configs. Forcing consumers to use these defaults would be problematic +// as defaulting in the scheme is done as part of the conversion, and there would +// be no easy way to opt-out. Instead, if you want to use this defaulting method +// run it in your wrapper struct of this type in its `SetDefaults_` method. +func RecommendedDefaultDeviceTaintEvictionControllerConfiguration(obj *kubectrlmgrconfigv1alpha1.DeviceTaintEvictionControllerConfiguration) { + if obj.ConcurrentSyncs == 0 { + // This is a compromise between getting work done and not overwhelming the apiserver + // and pod informers. Integration testing with 100 workers modified pods so quickly + // that a watch in the integration test couldn't keep up: + // cacher.go:855] cacher (pods): 100 objects queued in incoming channel. + // cache_watcher.go:203] Forcing pods watcher close due to unresponsiveness: key: "/pods/", labels: "", fields: "". len(c.input) = 10, len(c.result) = 10, graceful = false + obj.ConcurrentSyncs = 8 + } +} diff --git a/pkg/controller/devicetainteviction/config/v1alpha1/defaults_test.go b/pkg/controller/devicetainteviction/config/v1alpha1/defaults_test.go new file mode 100644 index 00000000000..7f07f608407 --- /dev/null +++ b/pkg/controller/devicetainteviction/config/v1alpha1/defaults_test.go @@ -0,0 +1,31 @@ +/* +Copyright The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package v1alpha1 + +import ( + "testing" + + kubectrlmgrconfigv1alpha1 "k8s.io/kube-controller-manager/config/v1alpha1" +) + +func TestRecommendedDefaultDeviceTaintEvictionControllerConfiguration(t *testing.T) { + config := new(kubectrlmgrconfigv1alpha1.DeviceTaintEvictionControllerConfiguration) + RecommendedDefaultDeviceTaintEvictionControllerConfiguration(config) + if config.ConcurrentSyncs != 8 { + t.Errorf("incorrect default value, expected 8 but got %v", config.ConcurrentSyncs) + } +} diff --git a/pkg/controller/devicetainteviction/config/v1alpha1/doc.go b/pkg/controller/devicetainteviction/config/v1alpha1/doc.go new file mode 100644 index 00000000000..67b68078375 --- /dev/null +++ b/pkg/controller/devicetainteviction/config/v1alpha1/doc.go @@ -0,0 +1,21 @@ +/* +Copyright The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// +k8s:deepcopy-gen=package +// +k8s:conversion-gen=k8s.io/kubernetes/pkg/controller/devicetainteviction/config +// +k8s:conversion-gen-external-types=k8s.io/kube-controller-manager/config/v1alpha1 + +package v1alpha1 diff --git a/pkg/controller/devicetainteviction/config/v1alpha1/register.go b/pkg/controller/devicetainteviction/config/v1alpha1/register.go new file mode 100644 index 00000000000..85bbf3effe4 --- /dev/null +++ b/pkg/controller/devicetainteviction/config/v1alpha1/register.go @@ -0,0 +1,31 @@ +/* +Copyright The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package v1alpha1 + +import ( + "k8s.io/apimachinery/pkg/runtime" +) + +var ( + // SchemeBuilder is the scheme builder with scheme init functions to run for this API package + SchemeBuilder runtime.SchemeBuilder + // localSchemeBuilder extends the SchemeBuilder instance with the external types. In this package, + // defaulting and conversion init funcs are registered as well. + localSchemeBuilder = &SchemeBuilder + // AddToScheme is a global function that registers this API group & version to a scheme + AddToScheme = localSchemeBuilder.AddToScheme +) diff --git a/pkg/controller/devicetainteviction/device_taint_eviction.go b/pkg/controller/devicetainteviction/device_taint_eviction.go index bff689e108e..e3cbe35736d 100644 --- a/pkg/controller/devicetainteviction/device_taint_eviction.go +++ b/pkg/controller/devicetainteviction/device_taint_eviction.go @@ -57,15 +57,6 @@ import ( utilpod "k8s.io/kubernetes/pkg/util/pod" ) -const ( - // This is a compromise between getting work done and not overwhelming the apiserver - // and pod informers. Integration testing with 100 workers modified pods so quickly - // that a watch in the integration test couldn't keep up: - // cacher.go:855] cacher (pods): 100 objects queued in incoming channel. - // cache_watcher.go:203] Forcing pods watcher close due to unresponsiveness: key: "/pods/", labels: "", fields: "". len(c.input) = 10, len(c.result) = 10, graceful = false - numWorkers = 10 -) - // Controller listens to Taint changes of DRA devices and Toleration changes of ResourceClaims, // then deletes Pods which use ResourceClaims that don't tolerate a NoExecute taint. // Pods which have already reached a final state (aka terminated) don't need to be deleted. @@ -383,7 +374,7 @@ func New(c clientset.Interface, podInformer coreinformers.PodInformer, claimInfo // Run starts the controller which will run until the context is done. // An error is returned for startup problems. -func (tc *Controller) Run(ctx context.Context) error { +func (tc *Controller) Run(ctx context.Context, numWorkers int) error { defer utilruntime.HandleCrash() logger := klog.FromContext(ctx) logger.Info("Starting", "controller", tc.name) diff --git a/pkg/controller/devicetainteviction/device_taint_eviction_test.go b/pkg/controller/devicetainteviction/device_taint_eviction_test.go index fdf616d0d75..69834b36a1d 100644 --- a/pkg/controller/devicetainteviction/device_taint_eviction_test.go +++ b/pkg/controller/devicetainteviction/device_taint_eviction_test.go @@ -1509,7 +1509,7 @@ func TestEviction(t *testing.T) { wg.Add(1) go func() { defer wg.Done() - assert.NoError(tCtx, controller.Run(tCtx), "eviction controller failed") + assert.NoError(tCtx, controller.Run(tCtx, 10 /* workers */), "eviction controller failed") }() // Eventually the controller should have synced it's informers. @@ -1659,7 +1659,7 @@ func testCancelEviction(tCtx ktesting.TContext, deletePod bool) { wg.Add(1) go func() { defer wg.Done() - assert.NoError(tCtx, controller.Run(tCtx), "eviction controller failed") + assert.NoError(tCtx, controller.Run(tCtx, 10 /* workers */), "eviction controller failed") }() // Eventually the pod gets scheduled for eviction. @@ -1772,7 +1772,7 @@ func TestParallelPodDeletion(t *testing.T) { wg.Add(1) go func() { defer wg.Done() - assert.NoError(tCtx, controller.Run(tCtx), "eviction controller failed") + assert.NoError(tCtx, controller.Run(tCtx, 10 /* workers */), "eviction controller failed") }() // Eventually the pod gets deleted, in this test by us. @@ -1846,7 +1846,7 @@ func TestRetry(t *testing.T) { wg.Add(1) go func() { defer wg.Done() - assert.NoError(tCtx, controller.Run(tCtx), "eviction controller failed") + assert.NoError(tCtx, controller.Run(tCtx, 10 /* workers */), "eviction controller failed") }() // Eventually the pod gets deleted and the event is recorded. From 0689b628c7da93bae8cea786f6b69330ef661290 Mon Sep 17 00:00:00 2001 From: Patrick Ohly Date: Wed, 29 Oct 2025 07:39:08 +0100 Subject: [PATCH 09/11] generated files --- .../v1alpha1/zz_generated.conversion.go | 15 ++- .../apis/config/zz_generated.deepcopy.go | 5 +- .../v1alpha1/zz_generated.conversion.go | 92 +++++++++++++++++++ .../config/v1alpha1/zz_generated.deepcopy.go | 22 +++++ .../config/zz_generated.deepcopy.go | 38 ++++++++ pkg/generated/openapi/zz_generated.openapi.go | 34 ++++++- .../config/v1alpha1/types.go | 11 +++ .../config/v1alpha1/zz_generated.deepcopy.go | 17 ++++ .../v1alpha1/zz_generated.model_name.go | 5 + 9 files changed, 231 insertions(+), 8 deletions(-) create mode 100644 pkg/controller/devicetainteviction/config/v1alpha1/zz_generated.conversion.go create mode 100644 pkg/controller/devicetainteviction/config/v1alpha1/zz_generated.deepcopy.go create mode 100644 pkg/controller/devicetainteviction/config/zz_generated.deepcopy.go diff --git a/pkg/controller/apis/config/v1alpha1/zz_generated.conversion.go b/pkg/controller/apis/config/v1alpha1/zz_generated.conversion.go index 383177ba914..b9b276389aa 100644 --- a/pkg/controller/apis/config/v1alpha1/zz_generated.conversion.go +++ b/pkg/controller/apis/config/v1alpha1/zz_generated.conversion.go @@ -34,6 +34,7 @@ import ( cronjobconfigv1alpha1 "k8s.io/kubernetes/pkg/controller/cronjob/config/v1alpha1" daemonconfigv1alpha1 "k8s.io/kubernetes/pkg/controller/daemon/config/v1alpha1" deploymentconfigv1alpha1 "k8s.io/kubernetes/pkg/controller/deployment/config/v1alpha1" + devicetaintevictionconfigv1alpha1 "k8s.io/kubernetes/pkg/controller/devicetainteviction/config/v1alpha1" endpointconfigv1alpha1 "k8s.io/kubernetes/pkg/controller/endpoint/config/v1alpha1" endpointsliceconfigv1alpha1 "k8s.io/kubernetes/pkg/controller/endpointslice/config/v1alpha1" endpointslicemirroringconfigv1alpha1 "k8s.io/kubernetes/pkg/controller/endpointslicemirroring/config/v1alpha1" @@ -224,6 +225,9 @@ func autoConvert_v1alpha1_KubeControllerManagerConfiguration_To_config_KubeContr if err := validatingadmissionpolicystatusconfigv1alpha1.Convert_v1alpha1_ValidatingAdmissionPolicyStatusControllerConfiguration_To_config_ValidatingAdmissionPolicyStatusControllerConfiguration(&in.ValidatingAdmissionPolicyStatusController, &out.ValidatingAdmissionPolicyStatusController, s); err != nil { return err } + if err := devicetaintevictionconfigv1alpha1.Convert_v1alpha1_DeviceTaintEvictionControllerConfiguration_To_config_DeviceTaintEvictionControllerConfiguration(&in.DeviceTaintEvictionController, &out.DeviceTaintEvictionController, s); err != nil { + return err + } return nil } @@ -242,6 +246,9 @@ func autoConvert_config_KubeControllerManagerConfiguration_To_v1alpha1_KubeContr if err := attachdetachconfigv1alpha1.Convert_config_AttachDetachControllerConfiguration_To_v1alpha1_AttachDetachControllerConfiguration(&in.AttachDetachController, &out.AttachDetachController, s); err != nil { return err } + if err := cronjobconfigv1alpha1.Convert_config_CronJobControllerConfiguration_To_v1alpha1_CronJobControllerConfiguration(&in.CronJobController, &out.CronJobController, s); err != nil { + return err + } if err := signerconfigv1alpha1.Convert_config_CSRSigningControllerConfiguration_To_v1alpha1_CSRSigningControllerConfiguration(&in.CSRSigningController, &out.CSRSigningController, s); err != nil { return err } @@ -251,7 +258,7 @@ func autoConvert_config_KubeControllerManagerConfiguration_To_v1alpha1_KubeContr if err := deploymentconfigv1alpha1.Convert_config_DeploymentControllerConfiguration_To_v1alpha1_DeploymentControllerConfiguration(&in.DeploymentController, &out.DeploymentController, s); err != nil { return err } - if err := statefulsetconfigv1alpha1.Convert_config_StatefulSetControllerConfiguration_To_v1alpha1_StatefulSetControllerConfiguration(&in.StatefulSetController, &out.StatefulSetController, s); err != nil { + if err := devicetaintevictionconfigv1alpha1.Convert_config_DeviceTaintEvictionControllerConfiguration_To_v1alpha1_DeviceTaintEvictionControllerConfiguration(&in.DeviceTaintEvictionController, &out.DeviceTaintEvictionController, s); err != nil { return err } if err := Convert_config_DeprecatedControllerConfiguration_To_v1alpha1_DeprecatedControllerConfiguration(&in.DeprecatedController, &out.DeprecatedController, s); err != nil { @@ -278,9 +285,6 @@ func autoConvert_config_KubeControllerManagerConfiguration_To_v1alpha1_KubeContr if err := jobconfigv1alpha1.Convert_config_JobControllerConfiguration_To_v1alpha1_JobControllerConfiguration(&in.JobController, &out.JobController, s); err != nil { return err } - if err := cronjobconfigv1alpha1.Convert_config_CronJobControllerConfiguration_To_v1alpha1_CronJobControllerConfiguration(&in.CronJobController, &out.CronJobController, s); err != nil { - return err - } if err := serviceaccountconfigv1alpha1.Convert_config_LegacySATokenCleanerConfiguration_To_v1alpha1_LegacySATokenCleanerConfiguration(&in.LegacySATokenCleaner, &out.LegacySATokenCleaner, s); err != nil { return err } @@ -314,6 +318,9 @@ func autoConvert_config_KubeControllerManagerConfiguration_To_v1alpha1_KubeContr if err := serviceconfigv1alpha1.Convert_config_ServiceControllerConfiguration_To_v1alpha1_ServiceControllerConfiguration(&in.ServiceController, &out.ServiceController, s); err != nil { return err } + if err := statefulsetconfigv1alpha1.Convert_config_StatefulSetControllerConfiguration_To_v1alpha1_StatefulSetControllerConfiguration(&in.StatefulSetController, &out.StatefulSetController, s); err != nil { + return err + } if err := ttlafterfinishedconfigv1alpha1.Convert_config_TTLAfterFinishedControllerConfiguration_To_v1alpha1_TTLAfterFinishedControllerConfiguration(&in.TTLAfterFinishedController, &out.TTLAfterFinishedController, s); err != nil { return err } diff --git a/pkg/controller/apis/config/zz_generated.deepcopy.go b/pkg/controller/apis/config/zz_generated.deepcopy.go index 0393df7d99e..82d905bbe21 100644 --- a/pkg/controller/apis/config/zz_generated.deepcopy.go +++ b/pkg/controller/apis/config/zz_generated.deepcopy.go @@ -48,10 +48,11 @@ func (in *KubeControllerManagerConfiguration) DeepCopyInto(out *KubeControllerMa in.Generic.DeepCopyInto(&out.Generic) out.KubeCloudShared = in.KubeCloudShared out.AttachDetachController = in.AttachDetachController + out.CronJobController = in.CronJobController out.CSRSigningController = in.CSRSigningController out.DaemonSetController = in.DaemonSetController out.DeploymentController = in.DeploymentController - out.StatefulSetController = in.StatefulSetController + out.DeviceTaintEvictionController = in.DeviceTaintEvictionController out.DeprecatedController = in.DeprecatedController out.EndpointController = in.EndpointController out.EndpointSliceController = in.EndpointSliceController @@ -60,7 +61,6 @@ func (in *KubeControllerManagerConfiguration) DeepCopyInto(out *KubeControllerMa in.GarbageCollectorController.DeepCopyInto(&out.GarbageCollectorController) out.HPAController = in.HPAController out.JobController = in.JobController - out.CronJobController = in.CronJobController out.LegacySATokenCleaner = in.LegacySATokenCleaner out.NamespaceController = in.NamespaceController out.NodeIPAMController = in.NodeIPAMController @@ -72,6 +72,7 @@ func (in *KubeControllerManagerConfiguration) DeepCopyInto(out *KubeControllerMa out.ResourceQuotaController = in.ResourceQuotaController out.SAController = in.SAController out.ServiceController = in.ServiceController + out.StatefulSetController = in.StatefulSetController out.TTLAfterFinishedController = in.TTLAfterFinishedController out.ValidatingAdmissionPolicyStatusController = in.ValidatingAdmissionPolicyStatusController return diff --git a/pkg/controller/devicetainteviction/config/v1alpha1/zz_generated.conversion.go b/pkg/controller/devicetainteviction/config/v1alpha1/zz_generated.conversion.go new file mode 100644 index 00000000000..ec6654aa75d --- /dev/null +++ b/pkg/controller/devicetainteviction/config/v1alpha1/zz_generated.conversion.go @@ -0,0 +1,92 @@ +//go:build !ignore_autogenerated +// +build !ignore_autogenerated + +/* +Copyright The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Code generated by conversion-gen. DO NOT EDIT. + +package v1alpha1 + +import ( + v1 "k8s.io/apimachinery/pkg/apis/meta/v1" + conversion "k8s.io/apimachinery/pkg/conversion" + runtime "k8s.io/apimachinery/pkg/runtime" + configv1alpha1 "k8s.io/kube-controller-manager/config/v1alpha1" + config "k8s.io/kubernetes/pkg/controller/devicetainteviction/config" +) + +func init() { + localSchemeBuilder.Register(RegisterConversions) +} + +// RegisterConversions adds conversion functions to the given scheme. +// Public to allow building arbitrary schemes. +func RegisterConversions(s *runtime.Scheme) error { + if err := s.AddGeneratedConversionFunc((*configv1alpha1.GroupResource)(nil), (*v1.GroupResource)(nil), func(a, b interface{}, scope conversion.Scope) error { + return Convert_v1alpha1_GroupResource_To_v1_GroupResource(a.(*configv1alpha1.GroupResource), b.(*v1.GroupResource), scope) + }); err != nil { + return err + } + if err := s.AddGeneratedConversionFunc((*v1.GroupResource)(nil), (*configv1alpha1.GroupResource)(nil), func(a, b interface{}, scope conversion.Scope) error { + return Convert_v1_GroupResource_To_v1alpha1_GroupResource(a.(*v1.GroupResource), b.(*configv1alpha1.GroupResource), scope) + }); err != nil { + return err + } + if err := s.AddConversionFunc((*config.DeviceTaintEvictionControllerConfiguration)(nil), (*configv1alpha1.DeviceTaintEvictionControllerConfiguration)(nil), func(a, b interface{}, scope conversion.Scope) error { + return Convert_config_DeviceTaintEvictionControllerConfiguration_To_v1alpha1_DeviceTaintEvictionControllerConfiguration(a.(*config.DeviceTaintEvictionControllerConfiguration), b.(*configv1alpha1.DeviceTaintEvictionControllerConfiguration), scope) + }); err != nil { + return err + } + if err := s.AddConversionFunc((*configv1alpha1.DeviceTaintEvictionControllerConfiguration)(nil), (*config.DeviceTaintEvictionControllerConfiguration)(nil), func(a, b interface{}, scope conversion.Scope) error { + return Convert_v1alpha1_DeviceTaintEvictionControllerConfiguration_To_config_DeviceTaintEvictionControllerConfiguration(a.(*configv1alpha1.DeviceTaintEvictionControllerConfiguration), b.(*config.DeviceTaintEvictionControllerConfiguration), scope) + }); err != nil { + return err + } + return nil +} + +func autoConvert_v1alpha1_DeviceTaintEvictionControllerConfiguration_To_config_DeviceTaintEvictionControllerConfiguration(in *configv1alpha1.DeviceTaintEvictionControllerConfiguration, out *config.DeviceTaintEvictionControllerConfiguration, s conversion.Scope) error { + out.ConcurrentSyncs = in.ConcurrentSyncs + return nil +} + +func autoConvert_config_DeviceTaintEvictionControllerConfiguration_To_v1alpha1_DeviceTaintEvictionControllerConfiguration(in *config.DeviceTaintEvictionControllerConfiguration, out *configv1alpha1.DeviceTaintEvictionControllerConfiguration, s conversion.Scope) error { + out.ConcurrentSyncs = in.ConcurrentSyncs + return nil +} + +func autoConvert_v1alpha1_GroupResource_To_v1_GroupResource(in *configv1alpha1.GroupResource, out *v1.GroupResource, s conversion.Scope) error { + out.Group = in.Group + out.Resource = in.Resource + return nil +} + +// Convert_v1alpha1_GroupResource_To_v1_GroupResource is an autogenerated conversion function. +func Convert_v1alpha1_GroupResource_To_v1_GroupResource(in *configv1alpha1.GroupResource, out *v1.GroupResource, s conversion.Scope) error { + return autoConvert_v1alpha1_GroupResource_To_v1_GroupResource(in, out, s) +} + +func autoConvert_v1_GroupResource_To_v1alpha1_GroupResource(in *v1.GroupResource, out *configv1alpha1.GroupResource, s conversion.Scope) error { + out.Group = in.Group + out.Resource = in.Resource + return nil +} + +// Convert_v1_GroupResource_To_v1alpha1_GroupResource is an autogenerated conversion function. +func Convert_v1_GroupResource_To_v1alpha1_GroupResource(in *v1.GroupResource, out *configv1alpha1.GroupResource, s conversion.Scope) error { + return autoConvert_v1_GroupResource_To_v1alpha1_GroupResource(in, out, s) +} diff --git a/pkg/controller/devicetainteviction/config/v1alpha1/zz_generated.deepcopy.go b/pkg/controller/devicetainteviction/config/v1alpha1/zz_generated.deepcopy.go new file mode 100644 index 00000000000..61f6555edfc --- /dev/null +++ b/pkg/controller/devicetainteviction/config/v1alpha1/zz_generated.deepcopy.go @@ -0,0 +1,22 @@ +//go:build !ignore_autogenerated +// +build !ignore_autogenerated + +/* +Copyright The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Code generated by deepcopy-gen. DO NOT EDIT. + +package v1alpha1 diff --git a/pkg/controller/devicetainteviction/config/zz_generated.deepcopy.go b/pkg/controller/devicetainteviction/config/zz_generated.deepcopy.go new file mode 100644 index 00000000000..a42fadcd4a3 --- /dev/null +++ b/pkg/controller/devicetainteviction/config/zz_generated.deepcopy.go @@ -0,0 +1,38 @@ +//go:build !ignore_autogenerated +// +build !ignore_autogenerated + +/* +Copyright The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Code generated by deepcopy-gen. DO NOT EDIT. + +package config + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *DeviceTaintEvictionControllerConfiguration) DeepCopyInto(out *DeviceTaintEvictionControllerConfiguration) { + *out = *in + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DeviceTaintEvictionControllerConfiguration. +func (in *DeviceTaintEvictionControllerConfiguration) DeepCopy() *DeviceTaintEvictionControllerConfiguration { + if in == nil { + return nil + } + out := new(DeviceTaintEvictionControllerConfiguration) + in.DeepCopyInto(out) + return out +} diff --git a/pkg/generated/openapi/zz_generated.openapi.go b/pkg/generated/openapi/zz_generated.openapi.go index b9775bcf6d2..1abc4f49e1a 100644 --- a/pkg/generated/openapi/zz_generated.openapi.go +++ b/pkg/generated/openapi/zz_generated.openapi.go @@ -1379,6 +1379,7 @@ func GetOpenAPIDefinitions(ref common.ReferenceCallback) map[string]common.OpenA kubecontrollermanagerconfigv1alpha1.DaemonSetControllerConfiguration{}.OpenAPIModelName(): schema_k8sio_kube_controller_manager_config_v1alpha1_DaemonSetControllerConfiguration(ref), kubecontrollermanagerconfigv1alpha1.DeploymentControllerConfiguration{}.OpenAPIModelName(): schema_k8sio_kube_controller_manager_config_v1alpha1_DeploymentControllerConfiguration(ref), kubecontrollermanagerconfigv1alpha1.DeprecatedControllerConfiguration{}.OpenAPIModelName(): schema_k8sio_kube_controller_manager_config_v1alpha1_DeprecatedControllerConfiguration(ref), + kubecontrollermanagerconfigv1alpha1.DeviceTaintEvictionControllerConfiguration{}.OpenAPIModelName(): schema_k8sio_kube_controller_manager_config_v1alpha1_DeviceTaintEvictionControllerConfiguration(ref), kubecontrollermanagerconfigv1alpha1.EndpointControllerConfiguration{}.OpenAPIModelName(): schema_k8sio_kube_controller_manager_config_v1alpha1_EndpointControllerConfiguration(ref), kubecontrollermanagerconfigv1alpha1.EndpointSliceControllerConfiguration{}.OpenAPIModelName(): schema_k8sio_kube_controller_manager_config_v1alpha1_EndpointSliceControllerConfiguration(ref), kubecontrollermanagerconfigv1alpha1.EndpointSliceMirroringControllerConfiguration{}.OpenAPIModelName(): schema_k8sio_kube_controller_manager_config_v1alpha1_EndpointSliceMirroringControllerConfiguration(ref), @@ -66751,6 +66752,28 @@ func schema_k8sio_kube_controller_manager_config_v1alpha1_DeprecatedControllerCo } } +func schema_k8sio_kube_controller_manager_config_v1alpha1_DeviceTaintEvictionControllerConfiguration(ref common.ReferenceCallback) common.OpenAPIDefinition { + return common.OpenAPIDefinition{ + Schema: spec.Schema{ + SchemaProps: spec.SchemaProps{ + Description: "DeviceTaintEvictionControllerConfiguration contains elements configuring the device taint eviction controller.", + Type: []string{"object"}, + Properties: map[string]spec.Schema{ + "ConcurrentSyncs": { + SchemaProps: spec.SchemaProps{ + Description: "ConcurrentSyncs is the number of operations (deleting a pod, updating a ResourcClaim status, etc.) that will be done concurrently. Larger number = processing, but more CPU (and network) load.\n\nThe default is 10.", + Default: 0, + Type: []string{"integer"}, + Format: "int32", + }, + }, + }, + Required: []string{"ConcurrentSyncs"}, + }, + }, + } +} + func schema_k8sio_kube_controller_manager_config_v1alpha1_EndpointControllerConfiguration(ref common.ReferenceCallback) common.OpenAPIDefinition { return common.OpenAPIDefinition{ Schema: spec.Schema{ @@ -67256,12 +67279,19 @@ func schema_k8sio_kube_controller_manager_config_v1alpha1_KubeControllerManagerC Ref: ref(kubecontrollermanagerconfigv1alpha1.ValidatingAdmissionPolicyStatusControllerConfiguration{}.OpenAPIModelName()), }, }, + "DeviceTaintEvictionController": { + SchemaProps: spec.SchemaProps{ + Description: "DeviceTaintEvictionControllerConfiguration contains elements configuring the device taint eviction controller.", + Default: map[string]interface{}{}, + Ref: ref(kubecontrollermanagerconfigv1alpha1.DeviceTaintEvictionControllerConfiguration{}.OpenAPIModelName()), + }, + }, }, - Required: []string{"Generic", "KubeCloudShared", "AttachDetachController", "CSRSigningController", "DaemonSetController", "DeploymentController", "StatefulSetController", "DeprecatedController", "EndpointController", "EndpointSliceController", "EndpointSliceMirroringController", "EphemeralVolumeController", "GarbageCollectorController", "HPAController", "JobController", "CronJobController", "LegacySATokenCleaner", "NamespaceController", "NodeIPAMController", "NodeLifecycleController", "PersistentVolumeBinderController", "PodGCController", "ReplicaSetController", "ReplicationController", "ResourceQuotaController", "SAController", "ServiceController", "TTLAfterFinishedController", "ValidatingAdmissionPolicyStatusController"}, + Required: []string{"Generic", "KubeCloudShared", "AttachDetachController", "CSRSigningController", "DaemonSetController", "DeploymentController", "StatefulSetController", "DeprecatedController", "EndpointController", "EndpointSliceController", "EndpointSliceMirroringController", "EphemeralVolumeController", "GarbageCollectorController", "HPAController", "JobController", "CronJobController", "LegacySATokenCleaner", "NamespaceController", "NodeIPAMController", "NodeLifecycleController", "PersistentVolumeBinderController", "PodGCController", "ReplicaSetController", "ReplicationController", "ResourceQuotaController", "SAController", "ServiceController", "TTLAfterFinishedController", "ValidatingAdmissionPolicyStatusController", "DeviceTaintEvictionController"}, }, }, Dependencies: []string{ - configv1alpha1.KubeCloudSharedConfiguration{}.OpenAPIModelName(), serviceconfigv1alpha1.ServiceControllerConfiguration{}.OpenAPIModelName(), controllermanagerconfigv1alpha1.GenericControllerManagerConfiguration{}.OpenAPIModelName(), kubecontrollermanagerconfigv1alpha1.AttachDetachControllerConfiguration{}.OpenAPIModelName(), kubecontrollermanagerconfigv1alpha1.CSRSigningControllerConfiguration{}.OpenAPIModelName(), kubecontrollermanagerconfigv1alpha1.CronJobControllerConfiguration{}.OpenAPIModelName(), kubecontrollermanagerconfigv1alpha1.DaemonSetControllerConfiguration{}.OpenAPIModelName(), kubecontrollermanagerconfigv1alpha1.DeploymentControllerConfiguration{}.OpenAPIModelName(), kubecontrollermanagerconfigv1alpha1.DeprecatedControllerConfiguration{}.OpenAPIModelName(), kubecontrollermanagerconfigv1alpha1.EndpointControllerConfiguration{}.OpenAPIModelName(), kubecontrollermanagerconfigv1alpha1.EndpointSliceControllerConfiguration{}.OpenAPIModelName(), kubecontrollermanagerconfigv1alpha1.EndpointSliceMirroringControllerConfiguration{}.OpenAPIModelName(), kubecontrollermanagerconfigv1alpha1.EphemeralVolumeControllerConfiguration{}.OpenAPIModelName(), kubecontrollermanagerconfigv1alpha1.GarbageCollectorControllerConfiguration{}.OpenAPIModelName(), kubecontrollermanagerconfigv1alpha1.HPAControllerConfiguration{}.OpenAPIModelName(), kubecontrollermanagerconfigv1alpha1.JobControllerConfiguration{}.OpenAPIModelName(), kubecontrollermanagerconfigv1alpha1.LegacySATokenCleanerConfiguration{}.OpenAPIModelName(), kubecontrollermanagerconfigv1alpha1.NamespaceControllerConfiguration{}.OpenAPIModelName(), kubecontrollermanagerconfigv1alpha1.NodeIPAMControllerConfiguration{}.OpenAPIModelName(), kubecontrollermanagerconfigv1alpha1.NodeLifecycleControllerConfiguration{}.OpenAPIModelName(), kubecontrollermanagerconfigv1alpha1.PersistentVolumeBinderControllerConfiguration{}.OpenAPIModelName(), kubecontrollermanagerconfigv1alpha1.PodGCControllerConfiguration{}.OpenAPIModelName(), kubecontrollermanagerconfigv1alpha1.ReplicaSetControllerConfiguration{}.OpenAPIModelName(), kubecontrollermanagerconfigv1alpha1.ReplicationControllerConfiguration{}.OpenAPIModelName(), kubecontrollermanagerconfigv1alpha1.ResourceQuotaControllerConfiguration{}.OpenAPIModelName(), kubecontrollermanagerconfigv1alpha1.SAControllerConfiguration{}.OpenAPIModelName(), kubecontrollermanagerconfigv1alpha1.StatefulSetControllerConfiguration{}.OpenAPIModelName(), kubecontrollermanagerconfigv1alpha1.TTLAfterFinishedControllerConfiguration{}.OpenAPIModelName(), kubecontrollermanagerconfigv1alpha1.ValidatingAdmissionPolicyStatusControllerConfiguration{}.OpenAPIModelName()}, + configv1alpha1.KubeCloudSharedConfiguration{}.OpenAPIModelName(), serviceconfigv1alpha1.ServiceControllerConfiguration{}.OpenAPIModelName(), controllermanagerconfigv1alpha1.GenericControllerManagerConfiguration{}.OpenAPIModelName(), kubecontrollermanagerconfigv1alpha1.AttachDetachControllerConfiguration{}.OpenAPIModelName(), kubecontrollermanagerconfigv1alpha1.CSRSigningControllerConfiguration{}.OpenAPIModelName(), kubecontrollermanagerconfigv1alpha1.CronJobControllerConfiguration{}.OpenAPIModelName(), kubecontrollermanagerconfigv1alpha1.DaemonSetControllerConfiguration{}.OpenAPIModelName(), kubecontrollermanagerconfigv1alpha1.DeploymentControllerConfiguration{}.OpenAPIModelName(), kubecontrollermanagerconfigv1alpha1.DeprecatedControllerConfiguration{}.OpenAPIModelName(), kubecontrollermanagerconfigv1alpha1.DeviceTaintEvictionControllerConfiguration{}.OpenAPIModelName(), kubecontrollermanagerconfigv1alpha1.EndpointControllerConfiguration{}.OpenAPIModelName(), kubecontrollermanagerconfigv1alpha1.EndpointSliceControllerConfiguration{}.OpenAPIModelName(), kubecontrollermanagerconfigv1alpha1.EndpointSliceMirroringControllerConfiguration{}.OpenAPIModelName(), kubecontrollermanagerconfigv1alpha1.EphemeralVolumeControllerConfiguration{}.OpenAPIModelName(), kubecontrollermanagerconfigv1alpha1.GarbageCollectorControllerConfiguration{}.OpenAPIModelName(), kubecontrollermanagerconfigv1alpha1.HPAControllerConfiguration{}.OpenAPIModelName(), kubecontrollermanagerconfigv1alpha1.JobControllerConfiguration{}.OpenAPIModelName(), kubecontrollermanagerconfigv1alpha1.LegacySATokenCleanerConfiguration{}.OpenAPIModelName(), kubecontrollermanagerconfigv1alpha1.NamespaceControllerConfiguration{}.OpenAPIModelName(), kubecontrollermanagerconfigv1alpha1.NodeIPAMControllerConfiguration{}.OpenAPIModelName(), kubecontrollermanagerconfigv1alpha1.NodeLifecycleControllerConfiguration{}.OpenAPIModelName(), kubecontrollermanagerconfigv1alpha1.PersistentVolumeBinderControllerConfiguration{}.OpenAPIModelName(), kubecontrollermanagerconfigv1alpha1.PodGCControllerConfiguration{}.OpenAPIModelName(), kubecontrollermanagerconfigv1alpha1.ReplicaSetControllerConfiguration{}.OpenAPIModelName(), kubecontrollermanagerconfigv1alpha1.ReplicationControllerConfiguration{}.OpenAPIModelName(), kubecontrollermanagerconfigv1alpha1.ResourceQuotaControllerConfiguration{}.OpenAPIModelName(), kubecontrollermanagerconfigv1alpha1.SAControllerConfiguration{}.OpenAPIModelName(), kubecontrollermanagerconfigv1alpha1.StatefulSetControllerConfiguration{}.OpenAPIModelName(), kubecontrollermanagerconfigv1alpha1.TTLAfterFinishedControllerConfiguration{}.OpenAPIModelName(), kubecontrollermanagerconfigv1alpha1.ValidatingAdmissionPolicyStatusControllerConfiguration{}.OpenAPIModelName()}, } } diff --git a/staging/src/k8s.io/kube-controller-manager/config/v1alpha1/types.go b/staging/src/k8s.io/kube-controller-manager/config/v1alpha1/types.go index bb2ad0ecd48..3de443940b7 100644 --- a/staging/src/k8s.io/kube-controller-manager/config/v1alpha1/types.go +++ b/staging/src/k8s.io/kube-controller-manager/config/v1alpha1/types.go @@ -168,6 +168,8 @@ type KubeControllerManagerConfiguration struct { // ValidatingAdmissionPolicyStatusControllerConfiguration holds configuration for // ValidatingAdmissionPolicyStatusController related features. ValidatingAdmissionPolicyStatusController ValidatingAdmissionPolicyStatusControllerConfiguration + // DeviceTaintEvictionControllerConfiguration contains elements configuring the device taint eviction controller. + DeviceTaintEvictionController DeviceTaintEvictionControllerConfiguration } // AttachDetachControllerConfiguration contains elements describing AttachDetachController. @@ -488,3 +490,12 @@ type ValidatingAdmissionPolicyStatusControllerConfiguration struct { // The default value is 5. ConcurrentPolicySyncs int32 } + +// DeviceTaintEvictionControllerConfiguration contains elements configuring the device taint eviction controller. +type DeviceTaintEvictionControllerConfiguration struct { + // ConcurrentSyncs is the number of operations (deleting a pod, updating a ResourcClaim status, etc.) + // that will be done concurrently. Larger number = processing, but more CPU (and network) load. + // + // The default is 10. + ConcurrentSyncs int32 +} diff --git a/staging/src/k8s.io/kube-controller-manager/config/v1alpha1/zz_generated.deepcopy.go b/staging/src/k8s.io/kube-controller-manager/config/v1alpha1/zz_generated.deepcopy.go index 0ac9b6690e6..9715eca85bf 100644 --- a/staging/src/k8s.io/kube-controller-manager/config/v1alpha1/zz_generated.deepcopy.go +++ b/staging/src/k8s.io/kube-controller-manager/config/v1alpha1/zz_generated.deepcopy.go @@ -143,6 +143,22 @@ func (in *DeprecatedControllerConfiguration) DeepCopy() *DeprecatedControllerCon return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *DeviceTaintEvictionControllerConfiguration) DeepCopyInto(out *DeviceTaintEvictionControllerConfiguration) { + *out = *in + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DeviceTaintEvictionControllerConfiguration. +func (in *DeviceTaintEvictionControllerConfiguration) DeepCopy() *DeviceTaintEvictionControllerConfiguration { + if in == nil { + return nil + } + out := new(DeviceTaintEvictionControllerConfiguration) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *EndpointControllerConfiguration) DeepCopyInto(out *EndpointControllerConfiguration) { *out = *in @@ -321,6 +337,7 @@ func (in *KubeControllerManagerConfiguration) DeepCopyInto(out *KubeControllerMa out.ServiceController = in.ServiceController out.TTLAfterFinishedController = in.TTLAfterFinishedController out.ValidatingAdmissionPolicyStatusController = in.ValidatingAdmissionPolicyStatusController + out.DeviceTaintEvictionController = in.DeviceTaintEvictionController return } diff --git a/staging/src/k8s.io/kube-controller-manager/config/v1alpha1/zz_generated.model_name.go b/staging/src/k8s.io/kube-controller-manager/config/v1alpha1/zz_generated.model_name.go index c6be80ef005..9a5598f39db 100644 --- a/staging/src/k8s.io/kube-controller-manager/config/v1alpha1/zz_generated.model_name.go +++ b/staging/src/k8s.io/kube-controller-manager/config/v1alpha1/zz_generated.model_name.go @@ -56,6 +56,11 @@ func (in DeprecatedControllerConfiguration) OpenAPIModelName() string { return "io.k8s.kube-controller-manager.config.v1alpha1.DeprecatedControllerConfiguration" } +// OpenAPIModelName returns the OpenAPI model name for this type. +func (in DeviceTaintEvictionControllerConfiguration) OpenAPIModelName() string { + return "io.k8s.kube-controller-manager.config.v1alpha1.DeviceTaintEvictionControllerConfiguration" +} + // OpenAPIModelName returns the OpenAPI model name for this type. func (in EndpointControllerConfiguration) OpenAPIModelName() string { return "io.k8s.kube-controller-manager.config.v1alpha1.EndpointControllerConfiguration" From bbf8bc766e304c51ce5e26e20704b27ffb651017 Mon Sep 17 00:00:00 2001 From: Patrick Ohly Date: Wed, 29 Oct 2025 12:03:07 +0100 Subject: [PATCH 10/11] DRA device taints: DeviceTaintRule status To update the right statuses, the controller must collect more information about why a pod is being evicted. Updating the DeviceTaintRule statuses then is handled by the same work queue as evicting pods. Both operations already share the same client instance and thus QPS+server-side throttling, so they might as well share the same work queue. Deleting pods is not necessarily more important than informing users or vice-versa, so there is no strong argument for having different queues. While at it, switching the unit tests to usage of the same mock work queue as in staging/src/k8s.io/dynamic-resource-allocation/internal/workqueue. Because there is no time to add it properly to a staging repo, the implementation gets copied. --- .../device_taint_eviction.go | 703 +++++++-- .../device_taint_eviction_test.go | 1376 ++++++++++++++--- .../devicetainteviction/mockqueue_test.go | 257 +++ .../rbac/bootstrappolicy/controller_policy.go | 2 + .../internal/workqueue/mockqueue.go | 10 + 5 files changed, 1972 insertions(+), 376 deletions(-) create mode 100644 pkg/controller/devicetainteviction/mockqueue_test.go diff --git a/pkg/controller/devicetainteviction/device_taint_eviction.go b/pkg/controller/devicetainteviction/device_taint_eviction.go index e3cbe35736d..80b0c19c891 100644 --- a/pkg/controller/devicetainteviction/device_taint_eviction.go +++ b/pkg/controller/devicetainteviction/device_taint_eviction.go @@ -20,6 +20,7 @@ import ( "context" "fmt" "iter" + "maps" "math" "slices" "strings" @@ -38,6 +39,8 @@ import ( "k8s.io/apimachinery/pkg/util/diff" utilruntime "k8s.io/apimachinery/pkg/util/runtime" "k8s.io/apimachinery/pkg/util/sets" + metav1ac "k8s.io/client-go/applyconfigurations/meta/v1" + resourceac "k8s.io/client-go/applyconfigurations/resource/v1alpha3" coreinformers "k8s.io/client-go/informers/core/v1" resourceinformers "k8s.io/client-go/informers/resource/v1" resourcealphainformers "k8s.io/client-go/informers/resource/v1alpha3" @@ -57,6 +60,13 @@ import ( utilpod "k8s.io/kubernetes/pkg/util/pod" ) +const ( + // ruleStatusPeriod is the shortest time between DeviceTaintRule status + // updates while eviction is in progress. Once it is done, it no longer gets + // updated until in progress again. + ruleStatusPeriod = 10 * time.Second +) + // Controller listens to Taint changes of DRA devices and Toleration changes of ResourceClaims, // then deletes Pods which use ResourceClaims that don't tolerate a NoExecute taint. // Pods which have already reached a final state (aka terminated) don't need to be deleted. @@ -93,13 +103,8 @@ type Controller struct { metrics metrics.Metrics workqueue workqueue.TypedRateLimitingInterface[workItem] - // evictPod ensures that the pod gets evicted at the specified time. - // It doesn't block. - evictPod func(pod tainteviction.NamespacedObject, fireAt time.Time) - - // cancelEvict cancels eviction set up with evictPod earlier. - // Idempotent, returns false if there was nothing to cancel. - cancelEvict func(pod tainteviction.NamespacedObject) bool + evictPodHook func(pod tainteviction.NamespacedObject, eviction evictionAndReason) + cancelEvictHook func(pod tainteviction.NamespacedObject) bool // mutex protects the following shared data structures. mutex sync.Mutex @@ -108,7 +113,7 @@ type Controller struct { // // The entry for pod gets deleted when eviction is no longer necessary // and updated when the time changes. - deletePodAt map[tainteviction.NamespacedObject]time.Time + deletePodAt map[tainteviction.NamespacedObject]evictionAndReason // maybeDeletePodCount counts how often a worker checked a pod. // This is useful for unit testing, but probably not a good public metric. @@ -119,6 +124,20 @@ type Controller struct { // pools indexes all slices by driver and pool name. pools map[poolID]pool + + // taintRuleStats tracks information about work that was done for a specific DeviceTaintRule instance. + taintRuleStats map[types.UID]taintRuleStats + + // simulateRule is set only during simulation of a None effect. + // + // During such a simulation the corresponding rule from ruleLister + // has EffectNone and this one here has EffectNoExecute. + simulateRule *resourcealpha.DeviceTaintRule +} + +type taintRuleStats struct { + // numEvictedPods is the number of pods evicted because of this rule since starting the controller. + numEvictedPods int64 } type poolID struct { @@ -194,19 +213,19 @@ func (p pool) getTaintedDevices() []taintedDevice { } // getDevice looks up one device by name. Out-dated slices are ignored. -func (p pool) getDevice(deviceName string) *resourceapi.Device { +func (p pool) getDevice(deviceName string) (*resourceapi.ResourceSlice, *resourceapi.Device) { for _, slice := range p.slices { if slice.Spec.Pool.Generation != p.maxGeneration { continue } for i := range slice.Spec.Devices { if slice.Spec.Devices[i].Name == deviceName { - return &slice.Spec.Devices[i] + return slice, &slice.Spec.Devices[i] } } } - return nil + return nil, nil } type taintedDevice struct { @@ -219,23 +238,134 @@ type taintedDevice struct { type allocatedClaim struct { *resourceapi.ResourceClaim - // evictionTime, if non-nil, is the time at which pods using this claim need to be evicted. + // eviction, if non-nil, is the time at which pods using this claim need to be evicted. // This is the smallest value of all such per-device values. // For each device, the value is calculated as `