From e8d0a0f24b6805c8613f0914eea63c4f6c6abf87 Mon Sep 17 00:00:00 2001 From: takonomura Date: Fri, 8 May 2026 17:18:00 +0900 Subject: [PATCH] DRA: fix AllocationModeAll with consumed counters When collecting all matching devices for AllocationModeAll, the allocator did not record the source pool on the candidate device. Devices with consumed counters use that pool when checking shared counter availability, which caused kube-scheduler to panic. Set the pool on all-devices candidates in the stable, incubating, and experimental allocators, and add a shared regression test for AllocationModeAll with consumed counters. --- .../allocatortesting/allocator_testing.go | 37 +++++++++++++++++++ .../experimental/allocator_experimental.go | 1 + .../incubating/allocator_incubating.go | 1 + .../internal/stable/allocator_stable.go | 1 + 4 files changed, 40 insertions(+) diff --git a/staging/src/k8s.io/dynamic-resource-allocation/structured/internal/allocatortesting/allocator_testing.go b/staging/src/k8s.io/dynamic-resource-allocation/structured/internal/allocatortesting/allocator_testing.go index c368c28433a..b10509031e3 100644 --- a/staging/src/k8s.io/dynamic-resource-allocation/structured/internal/allocatortesting/allocator_testing.go +++ b/staging/src/k8s.io/dynamic-resource-allocation/structured/internal/allocatortesting/allocator_testing.go @@ -1233,6 +1233,43 @@ func TestAllocator(t *testing.T, deviceAllocationResult(req0, driverA, pool1, device1, false), )}, }, + "all-devices-with-consumed-counters": { + features: Features{ + PartitionableDevices: true, + }, + claimsToAllocate: objects(claimWithRequests(claim0, nil, resourceapi.DeviceRequest{ + Name: req0, + Exactly: &resourceapi.ExactDeviceRequest{ + AllocationMode: resourceapi.DeviceAllocationModeAll, + DeviceClassName: classA, + }, + })), + classes: objects(class(classA, driverA)), + slices: unwrapResourceSlices( + sliceWithDevices(slice1, node1, resourcePool(pool1, 2), driverA, + device(device1, nil, nil).withDeviceCounterConsumption( + deviceCounterConsumption(counterSet1, + map[string]resource.Quantity{ + "memory": resource.MustParse("4Gi"), + }, + ), + ), + ), + sliceWithCounterSets(slice2, node1, resourcePool(pool1, 2), driverA, + counterSet(counterSet1, + map[string]resource.Quantity{ + "memory": resource.MustParse("8Gi"), + }, + ), + ), + ), + node: node(node1, region1), + + expectResults: []any{allocationResult( + localNodeSelector(node1), + deviceAllocationResult(req0, driverA, pool1, device1, false), + )}, + }, "all-devices-many": { claimsToAllocate: objects(claimWithRequests(claim0, nil, resourceapi.DeviceRequest{ Name: req0, diff --git a/staging/src/k8s.io/dynamic-resource-allocation/structured/internal/experimental/allocator_experimental.go b/staging/src/k8s.io/dynamic-resource-allocation/structured/internal/experimental/allocator_experimental.go index b34221154e1..4540e795095 100644 --- a/staging/src/k8s.io/dynamic-resource-allocation/structured/internal/experimental/allocator_experimental.go +++ b/staging/src/k8s.io/dynamic-resource-allocation/structured/internal/experimental/allocator_experimental.go @@ -568,6 +568,7 @@ func (alloc *allocator) validateDeviceRequest(request requestAccessor, parentReq id: DeviceID{Driver: slice.Spec.Driver, Pool: slice.Spec.Pool.Name, Device: slice.Spec.Devices[deviceIndex].Name}, Device: &slice.Spec.Devices[deviceIndex], slice: slice, + pool: pool, } if alloc.features.ConsumableCapacity { // Next validate whether resource request over capacity diff --git a/staging/src/k8s.io/dynamic-resource-allocation/structured/internal/incubating/allocator_incubating.go b/staging/src/k8s.io/dynamic-resource-allocation/structured/internal/incubating/allocator_incubating.go index c5bcada8d33..0a9013320b0 100644 --- a/staging/src/k8s.io/dynamic-resource-allocation/structured/internal/incubating/allocator_incubating.go +++ b/staging/src/k8s.io/dynamic-resource-allocation/structured/internal/incubating/allocator_incubating.go @@ -565,6 +565,7 @@ func (alloc *allocator) validateDeviceRequest(request requestAccessor, parentReq id: DeviceID{Driver: slice.Spec.Driver, Pool: slice.Spec.Pool.Name, Device: slice.Spec.Devices[deviceIndex].Name}, Device: &slice.Spec.Devices[deviceIndex], slice: slice, + pool: pool, } if alloc.features.ConsumableCapacity { // Next validate whether resource request over capacity diff --git a/staging/src/k8s.io/dynamic-resource-allocation/structured/internal/stable/allocator_stable.go b/staging/src/k8s.io/dynamic-resource-allocation/structured/internal/stable/allocator_stable.go index 0dd866dbde8..b59601af4e3 100644 --- a/staging/src/k8s.io/dynamic-resource-allocation/structured/internal/stable/allocator_stable.go +++ b/staging/src/k8s.io/dynamic-resource-allocation/structured/internal/stable/allocator_stable.go @@ -471,6 +471,7 @@ func (alloc *allocator) validateDeviceRequest(request requestAccessor, parentReq id: DeviceID{Driver: slice.Spec.Driver, Pool: slice.Spec.Pool.Name, Device: slice.Spec.Devices[deviceIndex].Name}, Device: &slice.Spec.Devices[deviceIndex], slice: slice, + pool: pool, } requestData.allDevices = append(requestData.allDevices, device) }