mirror of
https://github.com/kubernetes/kubernetes.git
synced 2026-06-11 01:41:54 -04:00
DeviceTaintRule is off by default because the corresponding v1beta2 API group is off. When enabled, the potentially still disabled v1alpha3 API version was used instead of the new v1beta2, causing the scheduler to fail while setting up informers and then not scheduling pods.
4274 lines
148 KiB
Go
4274 lines
148 KiB
Go
/*
|
|
Copyright 2022 The Kubernetes Authors.
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License.
|
|
*/
|
|
|
|
package dynamicresources
|
|
|
|
import (
|
|
"context"
|
|
"errors"
|
|
"fmt"
|
|
"math"
|
|
"slices"
|
|
"sort"
|
|
"strings"
|
|
"sync"
|
|
"testing"
|
|
"time"
|
|
|
|
goruntime "runtime"
|
|
|
|
"github.com/google/go-cmp/cmp"
|
|
"github.com/google/go-cmp/cmp/cmpopts"
|
|
"github.com/stretchr/testify/assert"
|
|
"github.com/stretchr/testify/require"
|
|
v1 "k8s.io/api/core/v1"
|
|
resourceapi "k8s.io/api/resource/v1"
|
|
apierrors "k8s.io/apimachinery/pkg/api/errors"
|
|
apiresource "k8s.io/apimachinery/pkg/api/resource"
|
|
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
|
apiruntime "k8s.io/apimachinery/pkg/runtime"
|
|
"k8s.io/apimachinery/pkg/types"
|
|
"k8s.io/apimachinery/pkg/util/version"
|
|
"k8s.io/apimachinery/pkg/util/wait"
|
|
utilfeature "k8s.io/apiserver/pkg/util/feature"
|
|
"k8s.io/client-go/informers"
|
|
"k8s.io/client-go/kubernetes/fake"
|
|
cgotesting "k8s.io/client-go/testing"
|
|
"k8s.io/client-go/tools/cache"
|
|
"k8s.io/client-go/tools/events"
|
|
featuregatetesting "k8s.io/component-base/featuregate/testing"
|
|
compbasemetrics "k8s.io/component-base/metrics"
|
|
"k8s.io/component-base/metrics/testutil"
|
|
"k8s.io/dynamic-resource-allocation/deviceclass/extendedresourcecache"
|
|
resourceslicetracker "k8s.io/dynamic-resource-allocation/resourceslice/tracker"
|
|
"k8s.io/dynamic-resource-allocation/structured"
|
|
"k8s.io/dynamic-resource-allocation/structured/schedulerapi"
|
|
"k8s.io/klog/v2"
|
|
kubeschedulerconfigv1 "k8s.io/kube-scheduler/config/v1"
|
|
fwk "k8s.io/kube-scheduler/framework"
|
|
"k8s.io/kubernetes/pkg/features"
|
|
"k8s.io/kubernetes/pkg/scheduler/apis/config"
|
|
configv1 "k8s.io/kubernetes/pkg/scheduler/apis/config/v1"
|
|
"k8s.io/kubernetes/pkg/scheduler/framework"
|
|
"k8s.io/kubernetes/pkg/scheduler/framework/plugins/feature"
|
|
"k8s.io/kubernetes/pkg/scheduler/framework/runtime"
|
|
"k8s.io/kubernetes/pkg/scheduler/metrics"
|
|
st "k8s.io/kubernetes/pkg/scheduler/testing"
|
|
"k8s.io/kubernetes/pkg/scheduler/util/assumecache"
|
|
"k8s.io/kubernetes/test/utils/ktesting"
|
|
"k8s.io/utils/ptr"
|
|
)
|
|
|
|
func init() {
|
|
metrics.InitMetrics()
|
|
}
|
|
|
|
var (
|
|
podKind = v1.SchemeGroupVersion.WithKind("Pod")
|
|
|
|
nodeName = "worker"
|
|
node2Name = "worker-2"
|
|
node3Name = "worker-3"
|
|
driver = "some-driver"
|
|
driver2 = "some-driver-2"
|
|
sharedDeviceName = "shared-instance"
|
|
podName = "my-pod"
|
|
podUID = "1234"
|
|
resourceName = "my-resource"
|
|
resourceName2 = resourceName + "-2"
|
|
capacityName = resourceapi.QualifiedName("my-cap")
|
|
claimName = podName + "-" + resourceName
|
|
claimName2 = podName + "-" + resourceName2
|
|
className = "my-resource-class"
|
|
namespace = "default"
|
|
attrName = resourceapi.QualifiedName("healthy") // device attribute only available on non-default node
|
|
extendedResourceName = "example.com/gpu"
|
|
extendedResourceName2 = "example.com/gpu2"
|
|
implicitExtendedResourceName = "deviceclass.resource.kubernetes.io/my-resource-class"
|
|
|
|
deviceClass = &resourceapi.DeviceClass{
|
|
ObjectMeta: metav1.ObjectMeta{
|
|
Name: className,
|
|
},
|
|
}
|
|
deviceClassWithExtendResourceName = &resourceapi.DeviceClass{
|
|
ObjectMeta: metav1.ObjectMeta{
|
|
Name: className,
|
|
},
|
|
Spec: resourceapi.DeviceClassSpec{
|
|
ExtendedResourceName: &extendedResourceName,
|
|
},
|
|
}
|
|
deviceClassWithExtendResourceName2 = &resourceapi.DeviceClass{
|
|
ObjectMeta: metav1.ObjectMeta{
|
|
Name: className + "2",
|
|
},
|
|
Spec: resourceapi.DeviceClassSpec{
|
|
ExtendedResourceName: &extendedResourceName2,
|
|
},
|
|
}
|
|
podWithClaimName = st.MakePod().Name(podName).Namespace(namespace).
|
|
UID(podUID).
|
|
PodResourceClaims(v1.PodResourceClaim{Name: resourceName, ResourceClaimName: &claimName}).
|
|
Obj()
|
|
podWithClaimTemplate = st.MakePod().Name(podName).Namespace(namespace).
|
|
UID(podUID).
|
|
PodResourceClaims(v1.PodResourceClaim{Name: resourceName, ResourceClaimTemplateName: &claimName}).
|
|
Obj()
|
|
podWithClaimTemplateInStatus = func() *v1.Pod {
|
|
pod := podWithClaimTemplate.DeepCopy()
|
|
pod.Status.ResourceClaimStatuses = []v1.PodResourceClaimStatus{
|
|
{
|
|
Name: pod.Spec.ResourceClaims[0].Name,
|
|
ResourceClaimName: &claimName,
|
|
},
|
|
}
|
|
return pod
|
|
}()
|
|
podWithTwoClaimTemplates = st.MakePod().Name(podName).Namespace(namespace).
|
|
UID(podUID).
|
|
PodResourceClaims(v1.PodResourceClaim{Name: resourceName, ResourceClaimTemplateName: &claimName}).
|
|
PodResourceClaims(v1.PodResourceClaim{Name: resourceName2, ResourceClaimTemplateName: &claimName}).
|
|
Obj()
|
|
podWithTwoClaimNames = st.MakePod().Name(podName).Namespace(namespace).
|
|
UID(podUID).
|
|
PodResourceClaims(v1.PodResourceClaim{Name: resourceName, ResourceClaimName: &claimName}).
|
|
PodResourceClaims(v1.PodResourceClaim{Name: resourceName2, ResourceClaimName: &claimName2}).
|
|
Obj()
|
|
podWithExtendedResourceName = st.MakePod().Name(podName).Namespace(namespace).
|
|
UID(podUID).
|
|
Req(map[v1.ResourceName]string{
|
|
v1.ResourceName(extendedResourceName): "1",
|
|
}).
|
|
Obj()
|
|
podWithExtendedResourceName2 = st.MakePod().Name(podName).Namespace(namespace).
|
|
UID(podUID).
|
|
Req(map[v1.ResourceName]string{
|
|
v1.ResourceName(extendedResourceName): "1",
|
|
v1.ResourceName(extendedResourceName2): "1",
|
|
}).
|
|
Obj()
|
|
podWithImplicitExtendedResourceName = st.MakePod().Name(podName).Namespace(namespace).
|
|
UID(podUID).
|
|
Req(map[v1.ResourceName]string{
|
|
v1.ResourceName(implicitExtendedResourceName): "1",
|
|
v1.ResourceName(extendedResourceName): "2",
|
|
}).
|
|
Obj()
|
|
podWithImplicitExtendedResourceNameTwoContainers = st.MakePod().Name(podName).Namespace(namespace).
|
|
UID(podUID).
|
|
Req(map[v1.ResourceName]string{
|
|
v1.ResourceName(implicitExtendedResourceName): "1",
|
|
}).
|
|
Req(map[v1.ResourceName]string{
|
|
v1.ResourceName(extendedResourceName): "2",
|
|
}).
|
|
Obj()
|
|
|
|
// Node with "instance-1" device and no device attributes.
|
|
workerNode = &st.MakeNode().Name(nodeName).Label("kubernetes.io/hostname", nodeName).Node
|
|
workerNodeSlice = st.MakeResourceSlice(nodeName, driver).Device("instance-1").Obj()
|
|
largeWorkerNodeSlice = st.MakeResourceSlice(nodeName, driver).Device("instance-1").Device("instance-2").Device("instance-3").Device("instance-4").Obj()
|
|
|
|
// Node with same device, but now with a "healthy" boolean attribute.
|
|
workerNode2 = &st.MakeNode().Name(node2Name).Label("kubernetes.io/hostname", node2Name).Node
|
|
workerNode2Slice = st.MakeResourceSlice(node2Name, driver).Device("instance-1", map[resourceapi.QualifiedName]resourceapi.DeviceAttribute{attrName: {BoolValue: ptr.To(true)}}).Obj()
|
|
|
|
// Yet another node, same as the second one.
|
|
workerNode3 = &st.MakeNode().Name(node3Name).Label("kubernetes.io/hostname", node3Name).Node
|
|
workerNode3Slice = st.MakeResourceSlice(node3Name, driver).Device("instance-1", map[resourceapi.QualifiedName]resourceapi.DeviceAttribute{attrName: {BoolValue: ptr.To(true)}}).Obj()
|
|
|
|
workerNodeWithExtendedResource = &st.MakeNode().Name(nodeName).Label("kubernetes.io/hostname", nodeName).Capacity(map[v1.ResourceName]string{v1.ResourceName(extendedResourceName): "1"}).Node
|
|
workerNodeWithExtendedResourceZeroAllocatable = &st.MakeNode().Name(nodeName).Label("kubernetes.io/hostname", nodeName).Capacity(map[v1.ResourceName]string{v1.ResourceName(extendedResourceName): "0"}).Node
|
|
brokenSelector = resourceapi.DeviceSelector{
|
|
CEL: &resourceapi.CELDeviceSelector{
|
|
// Not set for workerNode.
|
|
Expression: fmt.Sprintf(`device.attributes["%s"].%s`, driver, attrName),
|
|
},
|
|
}
|
|
|
|
claim = st.MakeResourceClaim().
|
|
Name(claimName).
|
|
Namespace(namespace).
|
|
Request(className).
|
|
Obj()
|
|
largeClaim = st.MakeResourceClaim().
|
|
Name(claimName).
|
|
Namespace(namespace).
|
|
Request(className).
|
|
Request(className).
|
|
Request(className).
|
|
Request(className).
|
|
Request(className).
|
|
Obj()
|
|
claim2 = st.MakeResourceClaim().
|
|
Name(claimName2).
|
|
Namespace(namespace).
|
|
Request(className).
|
|
Obj()
|
|
claimWithPrioritzedList = st.MakeResourceClaim().
|
|
Name(claimName).
|
|
Namespace(namespace).
|
|
RequestWithPrioritizedList(
|
|
st.SubRequest("subreq-1", className, 1),
|
|
).
|
|
Obj()
|
|
claimWithPrioritizedListAndSelector = st.MakeResourceClaim().
|
|
Name(claimName).
|
|
Namespace(namespace).
|
|
RequestWithPrioritizedList(
|
|
st.SubRequestWithSelector("subreq-1", className, fmt.Sprintf(`device.attributes["%s"].%s`, driver, attrName)),
|
|
st.SubRequest("subreq-2", className, 1),
|
|
).
|
|
Obj()
|
|
claimWithMultiplePrioritizedListRequests = st.MakeResourceClaim().
|
|
Name(claimName).
|
|
Namespace(namespace).
|
|
RequestWithPrioritizedList(
|
|
st.SubRequest("subreq-1", className, 2),
|
|
st.SubRequest("subreq-2", className, 1),
|
|
).
|
|
RequestWithPrioritizedList(
|
|
st.SubRequest("subreq-1", className, 2),
|
|
st.SubRequest("subreq-2", className, 1),
|
|
).Obj()
|
|
claim2WithPrioritizedListAndMultipleSubrequests = st.MakeResourceClaim().
|
|
Name(claimName2).
|
|
Namespace(namespace).
|
|
RequestWithPrioritizedList(
|
|
st.SubRequest("subreq-1", className, 4),
|
|
st.SubRequest("subreq-2", className, 3),
|
|
st.SubRequest("subreq-3", className, 2),
|
|
st.SubRequest("subreq-4", className, 1),
|
|
).Obj()
|
|
|
|
pendingClaim = st.FromResourceClaim(claim).
|
|
OwnerReference(podName, podUID, podKind).
|
|
Obj()
|
|
pendingClaim2 = st.FromResourceClaim(claim2).
|
|
OwnerReference(podName, podUID, podKind).
|
|
Obj()
|
|
pendingClaimWithPrioritizedList = st.FromResourceClaim(claimWithPrioritzedList).
|
|
OwnerReference(podName, podUID, podKind).
|
|
Obj()
|
|
pendingClaimWithPrioritizedListAndSelector = st.FromResourceClaim(claimWithPrioritizedListAndSelector).
|
|
OwnerReference(podName, podUID, podKind).
|
|
Obj()
|
|
pendingClaim2WithPrioritizedListAndMultipleSubrequests = st.FromResourceClaim(claim2WithPrioritizedListAndMultipleSubrequests).
|
|
OwnerReference(podName, podUID, podKind).
|
|
Obj()
|
|
pendingClaimWithMultiplePrioritizedListRequests = st.FromResourceClaim(claimWithMultiplePrioritizedListRequests).
|
|
OwnerReference(podName, podUID, podKind).
|
|
Obj()
|
|
allocationResult = &resourceapi.AllocationResult{
|
|
Devices: resourceapi.DeviceAllocationResult{
|
|
Results: []resourceapi.DeviceRequestAllocationResult{{
|
|
Driver: driver,
|
|
Pool: nodeName,
|
|
Device: "instance-1",
|
|
Request: "req-1",
|
|
}},
|
|
},
|
|
NodeSelector: func() *v1.NodeSelector {
|
|
return st.MakeNodeSelector().In("metadata.name", []string{nodeName}, st.NodeSelectorTypeMatchFields).Obj()
|
|
}(),
|
|
}
|
|
allocationResultWithSharedDevice = &resourceapi.AllocationResult{
|
|
Devices: resourceapi.DeviceAllocationResult{
|
|
Results: []resourceapi.DeviceRequestAllocationResult{{
|
|
Driver: driver,
|
|
Pool: nodeName,
|
|
Device: sharedDeviceName,
|
|
Request: "req-1",
|
|
ShareID: ptr.To(types.UID("share-123")), // Shared device allocation
|
|
}},
|
|
},
|
|
NodeSelector: func() *v1.NodeSelector {
|
|
return st.MakeNodeSelector().In("metadata.name", []string{nodeName}, st.NodeSelectorTypeMatchFields).Obj()
|
|
}(),
|
|
}
|
|
allocationResultWithConsumedCapacity = &resourceapi.AllocationResult{
|
|
Devices: resourceapi.DeviceAllocationResult{
|
|
Results: []resourceapi.DeviceRequestAllocationResult{{
|
|
Driver: driver,
|
|
Pool: nodeName,
|
|
Device: sharedDeviceName,
|
|
Request: "req-1",
|
|
ShareID: ptr.To(types.UID("share-123")), // Shared device allocation
|
|
ConsumedCapacity: map[resourceapi.QualifiedName]apiresource.Quantity{
|
|
capacityName: apiresource.MustParse("1"),
|
|
},
|
|
}},
|
|
},
|
|
NodeSelector: func() *v1.NodeSelector {
|
|
return st.MakeNodeSelector().In("metadata.name", []string{nodeName}, st.NodeSelectorTypeMatchFields).Obj()
|
|
}(),
|
|
}
|
|
allocationResultWithConsumedCapacity2 = &resourceapi.AllocationResult{
|
|
Devices: resourceapi.DeviceAllocationResult{
|
|
Results: []resourceapi.DeviceRequestAllocationResult{{
|
|
Driver: driver,
|
|
Pool: nodeName,
|
|
Device: sharedDeviceName,
|
|
Request: "req-1",
|
|
ShareID: ptr.To(types.UID("share-456")), // Shared device allocation
|
|
ConsumedCapacity: map[resourceapi.QualifiedName]apiresource.Quantity{
|
|
capacityName: apiresource.MustParse("1"),
|
|
},
|
|
}},
|
|
},
|
|
NodeSelector: func() *v1.NodeSelector {
|
|
return st.MakeNodeSelector().In("metadata.name", []string{nodeName}, st.NodeSelectorTypeMatchFields).Obj()
|
|
}(),
|
|
}
|
|
allocationResult2 = &resourceapi.AllocationResult{
|
|
Devices: resourceapi.DeviceAllocationResult{
|
|
Results: []resourceapi.DeviceRequestAllocationResult{{
|
|
Driver: driver2,
|
|
Pool: nodeName,
|
|
Device: "instance-2",
|
|
Request: "req-2",
|
|
}},
|
|
},
|
|
NodeSelector: func() *v1.NodeSelector {
|
|
return st.MakeNodeSelector().In("metadata.name", []string{nodeName}, st.NodeSelectorTypeMatchFields).Obj()
|
|
}(),
|
|
}
|
|
extendedResourceAllocationResult = &resourceapi.AllocationResult{
|
|
Devices: resourceapi.DeviceAllocationResult{
|
|
Results: []resourceapi.DeviceRequestAllocationResult{{
|
|
Driver: driver,
|
|
Pool: nodeName,
|
|
Device: "instance-1",
|
|
Request: "container-0-request-0",
|
|
}},
|
|
},
|
|
NodeSelector: func() *v1.NodeSelector {
|
|
return st.MakeNodeSelector().In("metadata.name", []string{nodeName}, st.NodeSelectorTypeMatchFields).Obj()
|
|
}(),
|
|
}
|
|
extendedResourceAllocationResult2 = &resourceapi.AllocationResult{
|
|
Devices: resourceapi.DeviceAllocationResult{
|
|
Results: []resourceapi.DeviceRequestAllocationResult{{
|
|
Driver: driver,
|
|
Pool: nodeName,
|
|
Device: "instance-1",
|
|
Request: "container-0-request-1",
|
|
}},
|
|
},
|
|
NodeSelector: func() *v1.NodeSelector {
|
|
return st.MakeNodeSelector().In("metadata.name", []string{nodeName}, st.NodeSelectorTypeMatchFields).Obj()
|
|
}(),
|
|
}
|
|
implicitExtendedResourceAllocationResult = &resourceapi.AllocationResult{
|
|
Devices: resourceapi.DeviceAllocationResult{
|
|
Results: []resourceapi.DeviceRequestAllocationResult{
|
|
{
|
|
Driver: driver,
|
|
Pool: nodeName,
|
|
Device: "instance-1",
|
|
Request: "container-0-request-0",
|
|
},
|
|
{
|
|
Driver: driver,
|
|
Pool: nodeName,
|
|
Device: "instance-2",
|
|
Request: "container-0-request-1",
|
|
},
|
|
{
|
|
Driver: driver,
|
|
Pool: nodeName,
|
|
Device: "instance-3",
|
|
Request: "container-0-request-1",
|
|
},
|
|
},
|
|
},
|
|
NodeSelector: func() *v1.NodeSelector {
|
|
return st.MakeNodeSelector().In("metadata.name", []string{nodeName}, st.NodeSelectorTypeMatchFields).Obj()
|
|
}(),
|
|
}
|
|
implicitExtendedResourceAllocationResultTwoContainers = &resourceapi.AllocationResult{
|
|
Devices: resourceapi.DeviceAllocationResult{
|
|
Results: []resourceapi.DeviceRequestAllocationResult{
|
|
{
|
|
Driver: driver,
|
|
Pool: nodeName,
|
|
Device: "instance-1",
|
|
Request: "container-0-request-0",
|
|
},
|
|
{
|
|
Driver: driver,
|
|
Pool: nodeName,
|
|
Device: "instance-2",
|
|
Request: "container-1-request-0",
|
|
},
|
|
{
|
|
Driver: driver,
|
|
Pool: nodeName,
|
|
Device: "instance-3",
|
|
Request: "container-1-request-0",
|
|
},
|
|
},
|
|
},
|
|
NodeSelector: func() *v1.NodeSelector {
|
|
return st.MakeNodeSelector().In("metadata.name", []string{nodeName}, st.NodeSelectorTypeMatchFields).Obj()
|
|
}(),
|
|
}
|
|
extendedResourceAllocationResultNode2 = &resourceapi.AllocationResult{
|
|
Devices: resourceapi.DeviceAllocationResult{
|
|
Results: []resourceapi.DeviceRequestAllocationResult{{
|
|
Driver: driver,
|
|
Pool: nodeName,
|
|
Device: "instance-1",
|
|
Request: "container-0-request-0",
|
|
}},
|
|
},
|
|
NodeSelector: func() *v1.NodeSelector {
|
|
return st.MakeNodeSelector().In("metadata.name", []string{node2Name}, st.NodeSelectorTypeMatchFields).Obj()
|
|
}(),
|
|
}
|
|
|
|
allocationResultWithPrioritizedList = &resourceapi.AllocationResult{
|
|
Devices: resourceapi.DeviceAllocationResult{
|
|
Results: []resourceapi.DeviceRequestAllocationResult{{
|
|
Driver: driver,
|
|
Pool: nodeName,
|
|
Device: "instance-1",
|
|
Request: "req-1/subreq-1",
|
|
}},
|
|
},
|
|
NodeSelector: func() *v1.NodeSelector {
|
|
return st.MakeNodeSelector().In("metadata.name", []string{nodeName}, st.NodeSelectorTypeMatchFields).Obj()
|
|
}(),
|
|
}
|
|
allocationResultWithPrioritizedListAndSelector = &resourceapi.AllocationResult{
|
|
Devices: resourceapi.DeviceAllocationResult{
|
|
Results: []resourceapi.DeviceRequestAllocationResult{{
|
|
Driver: driver,
|
|
Pool: nodeName,
|
|
Device: "instance-1",
|
|
Request: "req-1/subreq-1",
|
|
}},
|
|
},
|
|
NodeSelector: func() *v1.NodeSelector {
|
|
return st.MakeNodeSelector().In("metadata.name", []string{nodeName}, st.NodeSelectorTypeMatchFields).Obj()
|
|
}(),
|
|
}
|
|
allocationResultWithPrioritizedListAndMultipleSubrequests = &resourceapi.AllocationResult{
|
|
Devices: resourceapi.DeviceAllocationResult{
|
|
Results: []resourceapi.DeviceRequestAllocationResult{
|
|
{
|
|
Driver: driver,
|
|
Pool: nodeName,
|
|
Device: "instance-1",
|
|
Request: "req-1/subreq-2",
|
|
},
|
|
{
|
|
Driver: driver,
|
|
Pool: nodeName,
|
|
Device: "instance-2",
|
|
Request: "req-1/subreq-2",
|
|
},
|
|
{
|
|
Driver: driver,
|
|
Pool: nodeName,
|
|
Device: "instance-3",
|
|
Request: "req-1/subreq-2",
|
|
},
|
|
},
|
|
},
|
|
NodeSelector: func() *v1.NodeSelector {
|
|
return st.MakeNodeSelector().In("metadata.name", []string{nodeName}, st.NodeSelectorTypeMatchFields).Obj()
|
|
}(),
|
|
}
|
|
allocationResultWithMultiplePrioritizedListRequests = &resourceapi.AllocationResult{
|
|
Devices: resourceapi.DeviceAllocationResult{
|
|
Results: []resourceapi.DeviceRequestAllocationResult{
|
|
{
|
|
Driver: driver,
|
|
Pool: nodeName,
|
|
Device: "instance-1",
|
|
Request: "req-1/subreq-1",
|
|
},
|
|
{
|
|
Driver: driver,
|
|
Pool: nodeName,
|
|
Device: "instance-2",
|
|
Request: "req-1/subreq-1",
|
|
},
|
|
{
|
|
Driver: driver,
|
|
Pool: nodeName,
|
|
Device: "instance-1",
|
|
Request: "req-2/subreq-1",
|
|
},
|
|
{
|
|
Driver: driver,
|
|
Pool: nodeName,
|
|
Device: "instance-2",
|
|
Request: "req-2/subreq-1",
|
|
},
|
|
},
|
|
},
|
|
NodeSelector: func() *v1.NodeSelector {
|
|
return st.MakeNodeSelector().In("metadata.name", []string{nodeName}, st.NodeSelectorTypeMatchFields).Obj()
|
|
}(),
|
|
}
|
|
inUseClaim = st.FromResourceClaim(pendingClaim).
|
|
Allocation(allocationResult).
|
|
ReservedForPod(podName, types.UID(podUID)).
|
|
Obj()
|
|
inUseClaimWithPrioritizedList = st.FromResourceClaim(pendingClaimWithPrioritizedList).
|
|
Allocation(allocationResultWithPrioritizedList).
|
|
ReservedForPod(podName, types.UID(podUID)).
|
|
Obj()
|
|
inUseClaimWithPrioritizedListAndSelector = st.FromResourceClaim(pendingClaimWithPrioritizedListAndSelector).
|
|
Allocation(allocationResultWithPrioritizedListAndSelector).
|
|
ReservedForPod(podName, types.UID(podUID)).
|
|
Obj()
|
|
inUseClaim2WithPrioritizedListAndMultipleSubrequests = st.FromResourceClaim(pendingClaim2WithPrioritizedListAndMultipleSubrequests).
|
|
Allocation(allocationResultWithPrioritizedListAndMultipleSubrequests).
|
|
ReservedForPod(podName, types.UID(podUID)).
|
|
Obj()
|
|
inUseClaimWithMultiplePrioritizedListRequests = st.FromResourceClaim(pendingClaimWithMultiplePrioritizedListRequests).
|
|
Allocation(allocationResultWithMultiplePrioritizedListRequests).
|
|
ReservedForPod(podName, types.UID(podUID)).
|
|
Obj()
|
|
allocatedClaim = st.FromResourceClaim(pendingClaim).
|
|
Allocation(allocationResult).
|
|
Obj()
|
|
allocatedClaim2 = st.FromResourceClaim(pendingClaim2).
|
|
Allocation(allocationResult2).
|
|
Obj()
|
|
allocatedClaimWithPrioritizedList = st.FromResourceClaim(pendingClaimWithPrioritizedList).
|
|
Allocation(allocationResultWithPrioritizedList).
|
|
Obj()
|
|
allocatedClaimWithPrioritizedListAndSelector = st.FromResourceClaim(pendingClaimWithPrioritizedListAndSelector).
|
|
Allocation(allocationResultWithPrioritizedListAndSelector).
|
|
Obj()
|
|
allocatedClaim2WithPrioritizedListAndMultipleSubrequests = st.FromResourceClaim(pendingClaim2WithPrioritizedListAndMultipleSubrequests).
|
|
Allocation(allocationResultWithPrioritizedListAndMultipleSubrequests).
|
|
Obj()
|
|
allocatedClaimWithMultiplePrioritizedListRequests = st.FromResourceClaim(pendingClaimWithMultiplePrioritizedListRequests).
|
|
Allocation(allocationResultWithMultiplePrioritizedListRequests).
|
|
Obj()
|
|
allocatedClaimWithWrongTopology = st.FromResourceClaim(allocatedClaim).
|
|
Allocation(&resourceapi.AllocationResult{NodeSelector: st.MakeNodeSelector().In("no-such-label", []string{"no-such-value"}, st.NodeSelectorTypeMatchExpressions).Obj()}).
|
|
Obj()
|
|
allocatedClaimWithGoodTopology = st.FromResourceClaim(allocatedClaim).
|
|
Allocation(&resourceapi.AllocationResult{NodeSelector: st.MakeNodeSelector().In("kubernetes.io/hostname", []string{nodeName}, st.NodeSelectorTypeMatchExpressions).Obj()}).
|
|
Obj()
|
|
allocatedClaimWithSharedDevice = st.FromResourceClaim(pendingClaim).
|
|
Allocation(allocationResultWithSharedDevice).
|
|
Obj()
|
|
allocatedClaimWithConsumedCapacity = st.FromResourceClaim(pendingClaim).
|
|
Allocation(allocationResultWithConsumedCapacity).
|
|
Obj()
|
|
allocatedClaimWithConsumedCapacity2 = st.FromResourceClaim(pendingClaim).
|
|
Allocation(allocationResultWithConsumedCapacity2).
|
|
Obj()
|
|
otherClaim = st.MakeResourceClaim().
|
|
Name("not-my-claim").
|
|
Namespace(namespace).
|
|
Request(className).
|
|
Obj()
|
|
otherAllocatedClaim = st.FromResourceClaim(otherClaim).
|
|
Allocation(allocationResult).
|
|
Obj()
|
|
otherAllocatedClaimOtherDevice = func() *resourceapi.ResourceClaim {
|
|
claim := otherAllocatedClaim.DeepCopy()
|
|
claim.Status.Allocation.Devices.Results[0].Device += "-other"
|
|
return claim
|
|
}()
|
|
extendedResourceClaim = st.MakeResourceClaim().
|
|
Name("my-pod-extended-resources-0").
|
|
GenerateName("my-pod-extended-resources-").
|
|
Namespace(namespace).
|
|
Annotations(map[string]string{"resource.kubernetes.io/extended-resource-claim": "true"}).
|
|
OwnerRef(
|
|
metav1.OwnerReference{
|
|
APIVersion: "v1",
|
|
Kind: "Pod",
|
|
Name: podName,
|
|
UID: types.UID(podUID),
|
|
Controller: ptr.To(true),
|
|
}).
|
|
RequestWithName("container-0-request-0", className).
|
|
Allocation(extendedResourceAllocationResult).
|
|
Obj()
|
|
extendedResourceClaim2 = st.MakeResourceClaim().
|
|
Name("my-pod-extended-resources-0").
|
|
GenerateName("my-pod-extended-resources-").
|
|
Namespace(namespace).
|
|
Annotations(map[string]string{"resource.kubernetes.io/extended-resource-claim": "true"}).
|
|
OwnerRef(
|
|
metav1.OwnerReference{
|
|
APIVersion: "v1",
|
|
Kind: "Pod",
|
|
Name: podName,
|
|
UID: types.UID(podUID),
|
|
Controller: ptr.To(true),
|
|
}).
|
|
RequestWithName("container-0-request-1", className+"2").
|
|
Allocation(extendedResourceAllocationResult2).
|
|
Obj()
|
|
extendedResourceClaimNoName = st.MakeResourceClaim().
|
|
Name(specialClaimInMemName).
|
|
GenerateName("my-pod-extended-resources-").
|
|
Namespace(namespace).
|
|
Annotations(map[string]string{"resource.kubernetes.io/extended-resource-claim": "true"}).
|
|
OwnerRef(
|
|
metav1.OwnerReference{
|
|
APIVersion: "v1",
|
|
Kind: "Pod",
|
|
Name: podName,
|
|
UID: types.UID(podUID),
|
|
Controller: ptr.To(true),
|
|
}).
|
|
RequestWithName("container-0-request-0", className).
|
|
Allocation(extendedResourceAllocationResult).
|
|
Obj()
|
|
extendedResourceClaimNoName2 = st.MakeResourceClaim().
|
|
Name(specialClaimInMemName).
|
|
GenerateName("my-pod-extended-resources-").
|
|
Namespace(namespace).
|
|
Annotations(map[string]string{"resource.kubernetes.io/extended-resource-claim": "true"}).
|
|
OwnerRef(
|
|
metav1.OwnerReference{
|
|
APIVersion: "v1",
|
|
Kind: "Pod",
|
|
Name: podName,
|
|
UID: types.UID(podUID),
|
|
Controller: ptr.To(true),
|
|
}).
|
|
RequestWithName("container-0-request-1", className+"2").
|
|
Allocation(extendedResourceAllocationResult2).
|
|
Obj()
|
|
implicitExtendedResourceClaim = st.MakeResourceClaim().
|
|
Name("my-pod-extended-resources-0").
|
|
GenerateName("my-pod-extended-resources-").
|
|
Namespace(namespace).
|
|
Annotations(map[string]string{"resource.kubernetes.io/extended-resource-claim": "true"}).
|
|
OwnerRef(
|
|
metav1.OwnerReference{
|
|
APIVersion: "v1",
|
|
Kind: "Pod",
|
|
Name: podName,
|
|
UID: types.UID(podUID),
|
|
Controller: ptr.To(true),
|
|
}).
|
|
RequestWithName("container-0-request-0", className).
|
|
RequestWithNameCount("container-0-request-1", className, 2).
|
|
Allocation(implicitExtendedResourceAllocationResult).
|
|
Obj()
|
|
implicitExtendedResourceClaimNoName = st.MakeResourceClaim().
|
|
Name(specialClaimInMemName).
|
|
GenerateName("my-pod-extended-resources-").
|
|
Namespace(namespace).
|
|
Annotations(map[string]string{"resource.kubernetes.io/extended-resource-claim": "true"}).
|
|
OwnerRef(
|
|
metav1.OwnerReference{
|
|
APIVersion: "v1",
|
|
Kind: "Pod",
|
|
Name: podName,
|
|
UID: types.UID(podUID),
|
|
Controller: ptr.To(true),
|
|
}).
|
|
RequestWithName("container-0-request-0", className).
|
|
RequestWithNameCount("container-0-request-1", className, 2).
|
|
Allocation(implicitExtendedResourceAllocationResult).
|
|
Obj()
|
|
implicitExtendedResourceClaimTwoContainers = st.MakeResourceClaim().
|
|
Name("my-pod-extended-resources-0").
|
|
GenerateName("my-pod-extended-resources-").
|
|
Namespace(namespace).
|
|
Annotations(map[string]string{"resource.kubernetes.io/extended-resource-claim": "true"}).
|
|
OwnerRef(
|
|
metav1.OwnerReference{
|
|
APIVersion: "v1",
|
|
Kind: "Pod",
|
|
Name: podName,
|
|
UID: types.UID(podUID),
|
|
Controller: ptr.To(true),
|
|
}).
|
|
RequestWithName("container-0-request-0", className).
|
|
RequestWithNameCount("container-1-request-0", className, 2).
|
|
Allocation(implicitExtendedResourceAllocationResultTwoContainers).
|
|
Obj()
|
|
implicitExtendedResourceClaimNoNameTwoContainers = st.MakeResourceClaim().
|
|
Name(specialClaimInMemName).
|
|
GenerateName("my-pod-extended-resources-").
|
|
Namespace(namespace).
|
|
Annotations(map[string]string{"resource.kubernetes.io/extended-resource-claim": "true"}).
|
|
OwnerRef(
|
|
metav1.OwnerReference{
|
|
APIVersion: "v1",
|
|
Kind: "Pod",
|
|
Name: podName,
|
|
UID: types.UID(podUID),
|
|
Controller: ptr.To(true),
|
|
}).
|
|
RequestWithName("container-0-request-0", className).
|
|
RequestWithNameCount("container-1-request-0", className, 2).
|
|
Allocation(implicitExtendedResourceAllocationResultTwoContainers).
|
|
Obj()
|
|
extendedResourceClaimNode2 = st.MakeResourceClaim().
|
|
Name("my-pod-extended-resources-0").
|
|
GenerateName("my-pod-extended-resources-").
|
|
Namespace(namespace).
|
|
Annotations(map[string]string{"resource.kubernetes.io/extended-resource-claim": "true"}).
|
|
OwnerRef(
|
|
metav1.OwnerReference{
|
|
APIVersion: "v1",
|
|
Kind: "Pod",
|
|
Name: podName,
|
|
UID: types.UID(podUID),
|
|
Controller: ptr.To(true),
|
|
}).
|
|
RequestWithName("container-0-request-0", className).
|
|
Allocation(extendedResourceAllocationResultNode2).
|
|
Obj()
|
|
|
|
deviceTaint = resourceapi.DeviceTaint{
|
|
Key: "taint-key",
|
|
Value: "taint-value",
|
|
Effect: resourceapi.DeviceTaintEffectNoSchedule,
|
|
}
|
|
|
|
// for DRA Device Binding Conditions
|
|
bindingConditions = []string{"condition"}
|
|
bindingFailureConditions = []string{"failed"}
|
|
|
|
fabricSlice = func() *resourceapi.ResourceSlice {
|
|
res := st.MakeResourceSlice(nodeName, driver).Device("instance-1").Obj()
|
|
res.Spec.Devices[0].BindsToNode = ptr.To(true)
|
|
res.Spec.Devices[0].BindingConditions = bindingConditions
|
|
res.Spec.Devices[0].BindingFailureConditions = bindingFailureConditions
|
|
res.Spec.NodeSelector = st.MakeNodeSelector().In("metadata.name", []string{nodeName}, st.NodeSelectorTypeMatchFields).Obj()
|
|
return res
|
|
}()
|
|
|
|
fabricSlice2 = func() *resourceapi.ResourceSlice {
|
|
res := st.MakeResourceSlice(nodeName, driver2).Device("instance-2").Obj()
|
|
res.Spec.Devices[0].BindsToNode = ptr.To(true)
|
|
res.Spec.Devices[0].BindingConditions = bindingConditions
|
|
res.Spec.Devices[0].BindingFailureConditions = bindingFailureConditions
|
|
res.Spec.NodeSelector = st.MakeNodeSelector().In("metadata.name", []string{nodeName}, st.NodeSelectorTypeMatchFields).Obj()
|
|
return res
|
|
}()
|
|
|
|
allocationResultWithBindingConditions = &resourceapi.AllocationResult{
|
|
AllocationTimestamp: new(metav1.Time), // Non-nil, actual value not checked.
|
|
Devices: resourceapi.DeviceAllocationResult{
|
|
Results: []resourceapi.DeviceRequestAllocationResult{{
|
|
Driver: driver,
|
|
Pool: nodeName,
|
|
Device: "instance-1",
|
|
Request: "req-1",
|
|
BindingConditions: bindingConditions,
|
|
BindingFailureConditions: bindingFailureConditions,
|
|
}},
|
|
},
|
|
NodeSelector: st.MakeNodeSelector().In("metadata.name", []string{nodeName}, st.NodeSelectorTypeMatchFields).Obj(),
|
|
}
|
|
|
|
allocationResultWithBindingConditions2 = &resourceapi.AllocationResult{
|
|
AllocationTimestamp: new(metav1.Time), // Non-nil, actual value not checked.
|
|
Devices: resourceapi.DeviceAllocationResult{
|
|
Results: []resourceapi.DeviceRequestAllocationResult{{
|
|
Driver: driver2,
|
|
Pool: nodeName,
|
|
Device: "instance-2",
|
|
Request: "req-2",
|
|
BindingConditions: bindingConditions,
|
|
BindingFailureConditions: bindingFailureConditions,
|
|
}},
|
|
},
|
|
NodeSelector: st.MakeNodeSelector().In("metadata.name", []string{nodeName}, st.NodeSelectorTypeMatchFields).Obj(),
|
|
}
|
|
|
|
bindClaim = st.FromResourceClaim(allocatedClaim).
|
|
Allocation(allocationResultWithBindingConditions).
|
|
Obj()
|
|
|
|
boundClaim = st.FromResourceClaim(allocatedClaim).
|
|
Allocation(allocationResultWithBindingConditions).
|
|
AllocatedDeviceStatuses([]resourceapi.AllocatedDeviceStatus{
|
|
{
|
|
Driver: driver,
|
|
Pool: nodeName,
|
|
Device: "instance-1",
|
|
Conditions: []metav1.Condition{
|
|
{Type: "condition", Status: metav1.ConditionTrue},
|
|
{Type: "failed", Status: metav1.ConditionFalse},
|
|
},
|
|
},
|
|
}).
|
|
Obj()
|
|
|
|
boundClaim2 = st.FromResourceClaim(allocatedClaim2).
|
|
Allocation(allocationResultWithBindingConditions2).
|
|
AllocatedDeviceStatuses([]resourceapi.AllocatedDeviceStatus{
|
|
{
|
|
Driver: driver2,
|
|
Pool: nodeName,
|
|
Device: "instance-2",
|
|
Conditions: []metav1.Condition{
|
|
{Type: "condition", Status: metav1.ConditionTrue},
|
|
{Type: "failed", Status: metav1.ConditionFalse},
|
|
},
|
|
},
|
|
}).
|
|
Obj()
|
|
|
|
failedBindingClaim = st.FromResourceClaim(allocatedClaim).
|
|
Allocation(allocationResultWithBindingConditions).
|
|
AllocatedDeviceStatuses([]resourceapi.AllocatedDeviceStatus{
|
|
{
|
|
Driver: driver,
|
|
Pool: nodeName,
|
|
Device: "instance-1",
|
|
Conditions: []metav1.Condition{
|
|
{Type: "condition", Status: metav1.ConditionFalse},
|
|
{Type: "failed", Status: metav1.ConditionTrue},
|
|
},
|
|
},
|
|
}).
|
|
Obj()
|
|
|
|
failedBindingClaim2 = st.FromResourceClaim(allocatedClaim2).
|
|
Allocation(allocationResultWithBindingConditions2).
|
|
AllocatedDeviceStatuses([]resourceapi.AllocatedDeviceStatus{
|
|
{
|
|
Driver: driver2,
|
|
Pool: nodeName,
|
|
Device: "instance-2",
|
|
Conditions: []metav1.Condition{
|
|
{Type: "condition", Status: metav1.ConditionFalse},
|
|
{Type: "failed", Status: metav1.ConditionTrue},
|
|
},
|
|
},
|
|
}).
|
|
Obj()
|
|
)
|
|
|
|
func taintDevices(slice *resourceapi.ResourceSlice) *resourceapi.ResourceSlice {
|
|
slice = slice.DeepCopy()
|
|
for i := range slice.Spec.Devices {
|
|
slice.Spec.Devices[i].Taints = append(slice.Spec.Devices[i].Taints, deviceTaint)
|
|
}
|
|
return slice
|
|
}
|
|
|
|
func reserve(claim *resourceapi.ResourceClaim, pod *v1.Pod) *resourceapi.ResourceClaim {
|
|
return st.FromResourceClaim(claim).
|
|
ReservedForPod(pod.Name, types.UID(pod.UID)).
|
|
Obj()
|
|
}
|
|
|
|
// addAllocationTimestamp adds an AllocationTimestamp to a claim.
|
|
// Non-nil is all that matters for the go-cmp comparison.
|
|
// Test cases involving binding conditions must ensure that they
|
|
// have such a non-nil time stamp in their expected claims starting
|
|
// with PreBind because PreBind adds it when the feature is on.
|
|
func addAllocationTimestamp(claim *resourceapi.ResourceClaim) *resourceapi.ResourceClaim {
|
|
claim = claim.DeepCopy()
|
|
claim.Status.Allocation.AllocationTimestamp = new(metav1.Time)
|
|
return claim
|
|
}
|
|
|
|
func adminAccess(claim *resourceapi.ResourceClaim) *resourceapi.ResourceClaim {
|
|
claim = claim.DeepCopy()
|
|
for i := range claim.Spec.Devices.Requests {
|
|
claim.Spec.Devices.Requests[i].Exactly.AdminAccess = ptr.To(true)
|
|
}
|
|
if claim.Status.Allocation != nil {
|
|
for i := range claim.Status.Allocation.Devices.Results {
|
|
claim.Status.Allocation.Devices.Results[i].AdminAccess = ptr.To(true)
|
|
}
|
|
}
|
|
return claim
|
|
}
|
|
|
|
func breakCELInClaim(claim *resourceapi.ResourceClaim) *resourceapi.ResourceClaim {
|
|
claim = claim.DeepCopy()
|
|
for i := range claim.Spec.Devices.Requests {
|
|
for e := range claim.Spec.Devices.Requests[i].Exactly.Selectors {
|
|
claim.Spec.Devices.Requests[i].Exactly.Selectors[e] = brokenSelector
|
|
}
|
|
if len(claim.Spec.Devices.Requests[i].Exactly.Selectors) == 0 {
|
|
claim.Spec.Devices.Requests[i].Exactly.Selectors = []resourceapi.DeviceSelector{brokenSelector}
|
|
}
|
|
}
|
|
return claim
|
|
}
|
|
|
|
func breakCELInClass(class *resourceapi.DeviceClass) *resourceapi.DeviceClass {
|
|
class = class.DeepCopy()
|
|
for i := range class.Spec.Selectors {
|
|
class.Spec.Selectors[i] = brokenSelector
|
|
}
|
|
if len(class.Spec.Selectors) == 0 {
|
|
class.Spec.Selectors = []resourceapi.DeviceSelector{brokenSelector}
|
|
}
|
|
|
|
return class
|
|
}
|
|
|
|
func updateDeviceClassName(claim *resourceapi.ResourceClaim, deviceClassName string) *resourceapi.ResourceClaim {
|
|
claim = claim.DeepCopy()
|
|
for i := range claim.Spec.Devices.Requests {
|
|
// If the firstAvailable list is empty we update the device class name
|
|
// on the base request.
|
|
if len(claim.Spec.Devices.Requests[i].FirstAvailable) == 0 {
|
|
claim.Spec.Devices.Requests[i].Exactly.DeviceClassName = deviceClassName
|
|
} else {
|
|
// If subrequests are specified, update the device class name on
|
|
// all of them.
|
|
for j := range claim.Spec.Devices.Requests[i].FirstAvailable {
|
|
claim.Spec.Devices.Requests[i].FirstAvailable[j].DeviceClassName = deviceClassName
|
|
}
|
|
}
|
|
}
|
|
return claim
|
|
}
|
|
|
|
func getDefaultDynamicResourcesArgs() *config.DynamicResourcesArgs {
|
|
v1dra := &kubeschedulerconfigv1.DynamicResourcesArgs{}
|
|
configv1.SetDefaults_DynamicResourcesArgs(v1dra)
|
|
dra := &config.DynamicResourcesArgs{}
|
|
_ = configv1.Convert_v1_DynamicResourcesArgs_To_config_DynamicResourcesArgs(v1dra, dra, nil)
|
|
return dra
|
|
}
|
|
|
|
// result defines the expected outcome of some operation. It covers
|
|
// operation's status and the state of the world (= objects).
|
|
type result struct {
|
|
status *fwk.Status
|
|
// changes contains a mapping of name to an update function for
|
|
// the corresponding object. These functions apply exactly the expected
|
|
// changes to a copy of the object as it existed before the operation.
|
|
changes change
|
|
|
|
// added contains objects created by the operation.
|
|
added []metav1.Object
|
|
|
|
// removed contains objects deleted by the operation.
|
|
removed []metav1.Object
|
|
|
|
// assumedClaim is the one claim which is expected to be assumed,
|
|
// nil if none.
|
|
assumedClaim *resourceapi.ResourceClaim
|
|
|
|
// inFlightClaims is a list of claims which are expected to be tracked as
|
|
// in flight, nil if none.
|
|
inFlightClaims []metav1.Object
|
|
}
|
|
|
|
// change contains functions for modifying objects of a certain type. These
|
|
// functions will get called for all objects of that type. If they needs to
|
|
// make changes only to a particular instance, then it must check the name.
|
|
type change struct {
|
|
claim func(*resourceapi.ResourceClaim) *resourceapi.ResourceClaim
|
|
}
|
|
type perNodeResult map[string]result
|
|
|
|
func (p perNodeResult) forNode(nodeName string) result {
|
|
if p == nil {
|
|
return result{}
|
|
}
|
|
return p[nodeName]
|
|
}
|
|
|
|
type perNodeScoreResult map[string]int64
|
|
|
|
func (p perNodeScoreResult) forNode(nodeName string) int64 {
|
|
if p == nil {
|
|
return 0
|
|
}
|
|
return p[nodeName]
|
|
}
|
|
|
|
type want struct {
|
|
preenqueue result
|
|
preFilterResult *fwk.PreFilterResult
|
|
prefilter result
|
|
filter perNodeResult
|
|
prescore result
|
|
scoreResult perNodeScoreResult
|
|
score perNodeResult
|
|
normalizeScoreResult fwk.NodeScoreList
|
|
normalizeScore result
|
|
reserve result
|
|
unreserve result
|
|
preBindPreFlightStatus *fwk.Status
|
|
prebind result
|
|
postbind result
|
|
postFilterResult *fwk.PostFilterResult
|
|
postfilter result
|
|
|
|
// unreserveAfterBindFailure, if set, triggers a call to Unreserve
|
|
// after PreBind, as if the actual Bind had failed.
|
|
unreserveAfterBindFailure *result
|
|
|
|
// unreserveBeforePreBind, if set, triggers a call to Unreserve
|
|
// before PreBind, as if the some other PreBind plugin had failed.
|
|
unreserveBeforePreBind *result
|
|
}
|
|
|
|
// prepare contains changes for objects in the API server.
|
|
// Those changes are applied before running the steps. This can
|
|
// be used to simulate concurrent changes by some other entities
|
|
// like a resource driver.
|
|
type prepare struct {
|
|
filter change
|
|
prescore change
|
|
reserve change
|
|
unreserve change
|
|
prebind change
|
|
postbind change
|
|
postfilter change
|
|
}
|
|
|
|
type testPluginCase struct {
|
|
// patchTestCase gets called right before the test case is tested.
|
|
// It can be used to update time stamps in those test cases
|
|
// which are sensitive to the current time.
|
|
patchTestCase func(tc *testPluginCase)
|
|
|
|
args *config.DynamicResourcesArgs
|
|
nodes []*v1.Node // default if unset is workerNode
|
|
pod *v1.Pod
|
|
claims []*resourceapi.ResourceClaim
|
|
classes []*resourceapi.DeviceClass
|
|
|
|
inFlightClaims []*resourceapi.ResourceClaim
|
|
|
|
// objs get stored directly in the fake client, without passing
|
|
// through reactors, in contrast to the types above.
|
|
objs []apiruntime.Object
|
|
|
|
prepare prepare
|
|
want want
|
|
|
|
// Invoke Filter with a canceled context.
|
|
cancelFilter bool
|
|
|
|
// disableDRAAdminAccess is set to true to test behavior with the DRAAdminAccess feature gate disabled (emulates v1.35).
|
|
disableDRAAdminAccess bool
|
|
// enableDRADeviceBindingConditions is set to true if the DRADeviceBindingConditions feature gate is enabled.
|
|
enableDRADeviceBindingConditions bool
|
|
// EnableDRAResourceClaimDeviceStatus is set to true if the DRAResourceClaimDeviceStatus feature gate is enabled.
|
|
enableDRAResourceClaimDeviceStatus bool
|
|
// Feature gates. False is chosen so that the uncommon case
|
|
// doesn't need to be set.
|
|
disableDRA bool
|
|
|
|
enableDRAExtendedResource bool
|
|
enableDRAPrioritizedList bool
|
|
enableDRADeviceTaints bool
|
|
disableDRASchedulerFilterTimeout bool
|
|
skipOnWindows string
|
|
failPatch bool
|
|
reactors []cgotesting.Reactor
|
|
metrics func(ktesting.TContext, compbasemetrics.Gatherer)
|
|
}
|
|
|
|
func TestPlugin(t *testing.T) {
|
|
testPlugin(ktesting.Init(t))
|
|
}
|
|
func testPlugin(tCtx ktesting.TContext) {
|
|
testcases := map[string]testPluginCase{
|
|
"empty": {
|
|
pod: st.MakePod().Name("foo").Namespace("default").Obj(),
|
|
want: want{
|
|
prefilter: result{
|
|
status: fwk.NewStatus(fwk.Skip),
|
|
},
|
|
postfilter: result{
|
|
status: fwk.NewStatus(fwk.Unschedulable),
|
|
},
|
|
preBindPreFlightStatus: fwk.NewStatus(fwk.Skip),
|
|
},
|
|
},
|
|
"empty-with-extended-resources-enabled": {
|
|
enableDRAExtendedResource: true,
|
|
pod: st.MakePod().Name("foo").Namespace("default").Obj(),
|
|
want: want{
|
|
prefilter: result{
|
|
status: fwk.NewStatus(fwk.Skip),
|
|
},
|
|
postfilter: result{
|
|
status: fwk.NewStatus(fwk.Unschedulable),
|
|
},
|
|
preBindPreFlightStatus: fwk.NewStatus(fwk.Skip),
|
|
},
|
|
},
|
|
"claim-reference": {
|
|
pod: podWithClaimName,
|
|
claims: []*resourceapi.ResourceClaim{allocatedClaim, otherClaim},
|
|
want: want{
|
|
prebind: result{
|
|
assumedClaim: reserve(allocatedClaim, podWithClaimName),
|
|
changes: change{
|
|
claim: func(claim *resourceapi.ResourceClaim) *resourceapi.ResourceClaim {
|
|
if claim.Name == claimName {
|
|
claim = claim.DeepCopy()
|
|
claim.Status.ReservedFor = inUseClaim.Status.ReservedFor
|
|
}
|
|
return claim
|
|
},
|
|
},
|
|
},
|
|
},
|
|
},
|
|
"claim-template": {
|
|
pod: podWithClaimTemplateInStatus,
|
|
claims: []*resourceapi.ResourceClaim{allocatedClaim, otherClaim},
|
|
want: want{
|
|
prebind: result{
|
|
assumedClaim: reserve(allocatedClaim, podWithClaimTemplateInStatus),
|
|
changes: change{
|
|
claim: func(claim *resourceapi.ResourceClaim) *resourceapi.ResourceClaim {
|
|
if claim.Name == claimName {
|
|
claim = claim.DeepCopy()
|
|
claim.Status.ReservedFor = inUseClaim.Status.ReservedFor
|
|
}
|
|
return claim
|
|
},
|
|
},
|
|
},
|
|
},
|
|
},
|
|
"missing-claim": {
|
|
pod: podWithClaimTemplate, // status not set
|
|
claims: []*resourceapi.ResourceClaim{allocatedClaim, otherClaim},
|
|
want: want{
|
|
preenqueue: result{
|
|
status: fwk.NewStatus(fwk.UnschedulableAndUnresolvable, `pod "default/my-pod": ResourceClaim not created yet`),
|
|
},
|
|
},
|
|
},
|
|
"deleted-claim": {
|
|
pod: podWithClaimTemplateInStatus,
|
|
claims: func() []*resourceapi.ResourceClaim {
|
|
claim := allocatedClaim.DeepCopy()
|
|
claim.DeletionTimestamp = &metav1.Time{Time: time.Now()}
|
|
return []*resourceapi.ResourceClaim{claim}
|
|
}(),
|
|
want: want{
|
|
preenqueue: result{
|
|
status: fwk.NewStatus(fwk.UnschedulableAndUnresolvable, `resourceclaim "my-pod-my-resource" is being deleted`),
|
|
},
|
|
},
|
|
},
|
|
"wrong-claim": {
|
|
pod: podWithClaimTemplateInStatus,
|
|
claims: func() []*resourceapi.ResourceClaim {
|
|
claim := allocatedClaim.DeepCopy()
|
|
claim.OwnerReferences[0].UID += "123"
|
|
return []*resourceapi.ResourceClaim{claim}
|
|
}(),
|
|
want: want{
|
|
preenqueue: result{
|
|
status: fwk.NewStatus(fwk.UnschedulableAndUnresolvable, `ResourceClaim default/my-pod-my-resource was not created for pod default/my-pod (pod is not owner)`),
|
|
},
|
|
},
|
|
},
|
|
"no-resources": {
|
|
pod: podWithClaimName,
|
|
claims: []*resourceapi.ResourceClaim{pendingClaim},
|
|
classes: []*resourceapi.DeviceClass{deviceClass},
|
|
want: want{
|
|
filter: perNodeResult{
|
|
workerNode.Name: {
|
|
status: fwk.NewStatus(fwk.UnschedulableAndUnresolvable, `cannot allocate all claims`),
|
|
},
|
|
},
|
|
postfilter: result{
|
|
status: fwk.NewStatus(fwk.Unschedulable, `still not schedulable`),
|
|
},
|
|
},
|
|
},
|
|
"with-resources": {
|
|
pod: podWithClaimName,
|
|
claims: []*resourceapi.ResourceClaim{pendingClaim},
|
|
classes: []*resourceapi.DeviceClass{deviceClass},
|
|
objs: []apiruntime.Object{workerNodeSlice},
|
|
want: want{
|
|
reserve: result{
|
|
inFlightClaims: []metav1.Object{allocatedClaim},
|
|
},
|
|
prebind: result{
|
|
assumedClaim: reserve(allocatedClaim, podWithClaimName),
|
|
changes: change{
|
|
claim: func(claim *resourceapi.ResourceClaim) *resourceapi.ResourceClaim {
|
|
if claim.Name == claimName {
|
|
claim = claim.DeepCopy()
|
|
claim.Finalizers = allocatedClaim.Finalizers
|
|
claim.Status = inUseClaim.Status
|
|
}
|
|
return claim
|
|
},
|
|
},
|
|
},
|
|
postbind: result{
|
|
assumedClaim: reserve(allocatedClaim, podWithClaimName),
|
|
},
|
|
},
|
|
},
|
|
"with-resources-has-finalizer": {
|
|
// As before. but the finalizer is already set. Could happen if
|
|
// the scheduler got interrupted.
|
|
pod: podWithClaimName,
|
|
claims: func() []*resourceapi.ResourceClaim {
|
|
claim := pendingClaim
|
|
claim.Finalizers = allocatedClaim.Finalizers
|
|
return []*resourceapi.ResourceClaim{claim}
|
|
}(),
|
|
classes: []*resourceapi.DeviceClass{deviceClass},
|
|
objs: []apiruntime.Object{workerNodeSlice},
|
|
want: want{
|
|
reserve: result{
|
|
inFlightClaims: []metav1.Object{allocatedClaim},
|
|
},
|
|
prebind: result{
|
|
assumedClaim: reserve(allocatedClaim, podWithClaimName),
|
|
changes: change{
|
|
claim: func(claim *resourceapi.ResourceClaim) *resourceapi.ResourceClaim {
|
|
if claim.Name == claimName {
|
|
claim = claim.DeepCopy()
|
|
claim.Status = inUseClaim.Status
|
|
}
|
|
return claim
|
|
},
|
|
},
|
|
},
|
|
postbind: result{
|
|
assumedClaim: reserve(allocatedClaim, podWithClaimName),
|
|
},
|
|
},
|
|
},
|
|
"with-resources-finalizer-gets-removed": {
|
|
// As before. but the finalizer is already set. Then it gets
|
|
// removed before the scheduler reaches PreBind.
|
|
pod: podWithClaimName,
|
|
claims: func() []*resourceapi.ResourceClaim {
|
|
claim := pendingClaim
|
|
claim.Finalizers = allocatedClaim.Finalizers
|
|
return []*resourceapi.ResourceClaim{claim}
|
|
}(),
|
|
classes: []*resourceapi.DeviceClass{deviceClass},
|
|
objs: []apiruntime.Object{workerNodeSlice},
|
|
prepare: prepare{
|
|
prebind: change{
|
|
claim: func(claim *resourceapi.ResourceClaim) *resourceapi.ResourceClaim {
|
|
claim.Finalizers = nil
|
|
return claim
|
|
},
|
|
},
|
|
},
|
|
want: want{
|
|
reserve: result{
|
|
inFlightClaims: []metav1.Object{allocatedClaim},
|
|
},
|
|
prebind: result{
|
|
assumedClaim: reserve(allocatedClaim, podWithClaimName),
|
|
changes: change{
|
|
claim: func(claim *resourceapi.ResourceClaim) *resourceapi.ResourceClaim {
|
|
if claim.Name == claimName {
|
|
claim = claim.DeepCopy()
|
|
claim.Finalizers = allocatedClaim.Finalizers
|
|
claim.Status = inUseClaim.Status
|
|
}
|
|
return claim
|
|
},
|
|
},
|
|
},
|
|
postbind: result{
|
|
assumedClaim: reserve(allocatedClaim, podWithClaimName),
|
|
},
|
|
},
|
|
},
|
|
"with-resources-finalizer-gets-added": {
|
|
// No finalizer initially, then it gets added before
|
|
// the scheduler reaches PreBind. Shouldn't happen?
|
|
pod: podWithClaimName,
|
|
claims: []*resourceapi.ResourceClaim{pendingClaim},
|
|
classes: []*resourceapi.DeviceClass{deviceClass},
|
|
objs: []apiruntime.Object{workerNodeSlice},
|
|
prepare: prepare{
|
|
prebind: change{
|
|
claim: func(claim *resourceapi.ResourceClaim) *resourceapi.ResourceClaim {
|
|
claim.Finalizers = allocatedClaim.Finalizers
|
|
return claim
|
|
},
|
|
},
|
|
},
|
|
want: want{
|
|
reserve: result{
|
|
inFlightClaims: []metav1.Object{allocatedClaim},
|
|
},
|
|
prebind: result{
|
|
assumedClaim: reserve(allocatedClaim, podWithClaimName),
|
|
changes: change{
|
|
claim: func(claim *resourceapi.ResourceClaim) *resourceapi.ResourceClaim {
|
|
if claim.Name == claimName {
|
|
claim = claim.DeepCopy()
|
|
claim.Status = inUseClaim.Status
|
|
}
|
|
return claim
|
|
},
|
|
},
|
|
},
|
|
postbind: result{
|
|
assumedClaim: reserve(allocatedClaim, podWithClaimName),
|
|
},
|
|
},
|
|
},
|
|
"skip-bind": {
|
|
pod: podWithClaimName,
|
|
claims: []*resourceapi.ResourceClaim{pendingClaim},
|
|
classes: []*resourceapi.DeviceClass{deviceClass},
|
|
objs: []apiruntime.Object{workerNodeSlice},
|
|
want: want{
|
|
reserve: result{
|
|
inFlightClaims: []metav1.Object{allocatedClaim},
|
|
},
|
|
unreserveBeforePreBind: &result{},
|
|
},
|
|
},
|
|
"exhausted-resources-in-informer-cache": {
|
|
pod: podWithClaimName,
|
|
claims: []*resourceapi.ResourceClaim{pendingClaim, otherAllocatedClaim},
|
|
classes: []*resourceapi.DeviceClass{deviceClass},
|
|
objs: []apiruntime.Object{workerNodeSlice},
|
|
want: want{
|
|
filter: perNodeResult{
|
|
workerNode.Name: {
|
|
status: fwk.NewStatus(fwk.UnschedulableAndUnresolvable, `cannot allocate all claims`),
|
|
},
|
|
},
|
|
postfilter: result{
|
|
status: fwk.NewStatus(fwk.Unschedulable, `still not schedulable`),
|
|
},
|
|
},
|
|
},
|
|
"exhausted-resources-in-flight-claim": {
|
|
pod: podWithClaimName,
|
|
claims: []*resourceapi.ResourceClaim{pendingClaim, otherClaim},
|
|
inFlightClaims: []*resourceapi.ResourceClaim{otherAllocatedClaim},
|
|
classes: []*resourceapi.DeviceClass{deviceClass},
|
|
objs: []apiruntime.Object{workerNodeSlice},
|
|
want: want{
|
|
preenqueue: result{
|
|
inFlightClaims: []metav1.Object{otherAllocatedClaim},
|
|
},
|
|
prefilter: result{
|
|
inFlightClaims: []metav1.Object{otherAllocatedClaim},
|
|
},
|
|
filter: perNodeResult{
|
|
workerNode.Name: {
|
|
inFlightClaims: []metav1.Object{otherAllocatedClaim},
|
|
status: fwk.NewStatus(fwk.UnschedulableAndUnresolvable, `cannot allocate all claims`),
|
|
},
|
|
},
|
|
postfilter: result{
|
|
inFlightClaims: []metav1.Object{otherAllocatedClaim},
|
|
status: fwk.NewStatus(fwk.Unschedulable, `still not schedulable`),
|
|
},
|
|
},
|
|
},
|
|
"other-resources-in-flight-claim": {
|
|
pod: podWithClaimName,
|
|
claims: []*resourceapi.ResourceClaim{pendingClaim, otherClaim},
|
|
inFlightClaims: []*resourceapi.ResourceClaim{otherAllocatedClaimOtherDevice},
|
|
classes: []*resourceapi.DeviceClass{deviceClass},
|
|
objs: []apiruntime.Object{workerNodeSlice},
|
|
want: want{
|
|
preenqueue: result{
|
|
inFlightClaims: []metav1.Object{otherAllocatedClaimOtherDevice},
|
|
},
|
|
prefilter: result{
|
|
inFlightClaims: []metav1.Object{otherAllocatedClaimOtherDevice},
|
|
},
|
|
filter: perNodeResult{
|
|
workerNode.Name: {
|
|
inFlightClaims: []metav1.Object{otherAllocatedClaimOtherDevice},
|
|
},
|
|
},
|
|
reserve: result{
|
|
inFlightClaims: []metav1.Object{allocatedClaim, otherAllocatedClaimOtherDevice},
|
|
},
|
|
prebind: result{
|
|
inFlightClaims: []metav1.Object{otherAllocatedClaimOtherDevice},
|
|
assumedClaim: reserve(allocatedClaim, podWithClaimName),
|
|
changes: change{
|
|
claim: func(claim *resourceapi.ResourceClaim) *resourceapi.ResourceClaim {
|
|
if claim.Name == claimName {
|
|
claim = claim.DeepCopy()
|
|
claim.Status = inUseClaim.Status
|
|
}
|
|
return claim
|
|
},
|
|
},
|
|
},
|
|
postbind: result{
|
|
inFlightClaims: []metav1.Object{allocatedClaim, otherAllocatedClaimOtherDevice},
|
|
assumedClaim: reserve(allocatedClaim, podWithClaimName),
|
|
},
|
|
},
|
|
},
|
|
|
|
// The two test cases for device tainting only need to cover
|
|
// whether the feature gate is passed through to the allocator
|
|
// correctly. The actual logic around device taints and allocation
|
|
// is in the allocator.
|
|
"tainted-device-disabled": {
|
|
enableDRADeviceTaints: false,
|
|
pod: podWithClaimName,
|
|
claims: []*resourceapi.ResourceClaim{pendingClaim},
|
|
classes: []*resourceapi.DeviceClass{deviceClass},
|
|
objs: []apiruntime.Object{taintDevices(workerNodeSlice)},
|
|
want: want{
|
|
reserve: result{
|
|
inFlightClaims: []metav1.Object{allocatedClaim},
|
|
},
|
|
prebind: result{
|
|
assumedClaim: reserve(allocatedClaim, podWithClaimName),
|
|
changes: change{
|
|
claim: func(claim *resourceapi.ResourceClaim) *resourceapi.ResourceClaim {
|
|
if claim.Name == claimName {
|
|
claim = claim.DeepCopy()
|
|
claim.Finalizers = allocatedClaim.Finalizers
|
|
claim.Status = inUseClaim.Status
|
|
}
|
|
return claim
|
|
},
|
|
},
|
|
},
|
|
postbind: result{
|
|
assumedClaim: reserve(allocatedClaim, podWithClaimName),
|
|
},
|
|
},
|
|
},
|
|
"tainted-device-enabled": {
|
|
enableDRADeviceTaints: true,
|
|
pod: podWithClaimName,
|
|
claims: []*resourceapi.ResourceClaim{pendingClaim},
|
|
classes: []*resourceapi.DeviceClass{deviceClass},
|
|
objs: []apiruntime.Object{taintDevices(workerNodeSlice)},
|
|
want: want{
|
|
filter: perNodeResult{
|
|
workerNode.Name: {
|
|
status: fwk.NewStatus(fwk.UnschedulableAndUnresolvable, `cannot allocate all claims`),
|
|
},
|
|
},
|
|
postfilter: result{
|
|
status: fwk.NewStatus(fwk.Unschedulable, `still not schedulable`),
|
|
},
|
|
},
|
|
},
|
|
|
|
"request-admin-access-with-DRAAdminAccess-featuregate": {
|
|
// When the DRAAdminAccess feature gate is enabled,
|
|
// Because the pending claim asks for admin access,
|
|
// allocation succeeds despite resources being exhausted.
|
|
pod: podWithClaimName,
|
|
claims: []*resourceapi.ResourceClaim{adminAccess(pendingClaim), otherAllocatedClaim},
|
|
classes: []*resourceapi.DeviceClass{deviceClass},
|
|
objs: []apiruntime.Object{workerNodeSlice},
|
|
want: want{
|
|
reserve: result{
|
|
inFlightClaims: []metav1.Object{adminAccess(allocatedClaim)},
|
|
},
|
|
prebind: result{
|
|
assumedClaim: reserve(adminAccess(allocatedClaim), podWithClaimName),
|
|
changes: change{
|
|
claim: func(claim *resourceapi.ResourceClaim) *resourceapi.ResourceClaim {
|
|
if claim.Name == claimName {
|
|
claim = claim.DeepCopy()
|
|
claim.Finalizers = allocatedClaim.Finalizers
|
|
claim.Status = adminAccess(inUseClaim).Status
|
|
}
|
|
return claim
|
|
},
|
|
},
|
|
},
|
|
postbind: result{
|
|
assumedClaim: reserve(adminAccess(allocatedClaim), podWithClaimName),
|
|
},
|
|
},
|
|
},
|
|
"request-admin-access-without-DRAAdminAccess-featuregate": {
|
|
// When the DRAAdminAccess feature gate is disabled,
|
|
// even though the pending claim requests admin access,
|
|
// the scheduler returns an unschedulable status.
|
|
pod: podWithClaimName,
|
|
claims: []*resourceapi.ResourceClaim{adminAccess(pendingClaim), otherAllocatedClaim},
|
|
classes: []*resourceapi.DeviceClass{deviceClass},
|
|
objs: []apiruntime.Object{workerNodeSlice},
|
|
want: want{
|
|
filter: perNodeResult{
|
|
workerNode.Name: {
|
|
status: fwk.NewStatus(fwk.UnschedulableAndUnresolvable, `claim default/my-pod-my-resource, request req-1: admin access is requested, but the feature is disabled`),
|
|
},
|
|
},
|
|
},
|
|
disableDRAAdminAccess: true,
|
|
},
|
|
|
|
"structured-ignore-allocated-admin-access": {
|
|
// The allocated claim uses admin access, so a second claim may use
|
|
// the same device.
|
|
pod: podWithClaimName,
|
|
claims: []*resourceapi.ResourceClaim{pendingClaim, adminAccess(otherAllocatedClaim)},
|
|
classes: []*resourceapi.DeviceClass{deviceClass},
|
|
objs: []apiruntime.Object{workerNodeSlice},
|
|
want: want{
|
|
reserve: result{
|
|
inFlightClaims: []metav1.Object{allocatedClaim},
|
|
},
|
|
prebind: result{
|
|
assumedClaim: reserve(allocatedClaim, podWithClaimName),
|
|
changes: change{
|
|
claim: func(claim *resourceapi.ResourceClaim) *resourceapi.ResourceClaim {
|
|
if claim.Name == claimName {
|
|
claim = claim.DeepCopy()
|
|
claim.Finalizers = allocatedClaim.Finalizers
|
|
claim.Status = inUseClaim.Status
|
|
}
|
|
return claim
|
|
},
|
|
},
|
|
},
|
|
postbind: result{
|
|
assumedClaim: reserve(allocatedClaim, podWithClaimName),
|
|
},
|
|
},
|
|
},
|
|
|
|
"claim-parameters-CEL-runtime-error": {
|
|
pod: podWithClaimName,
|
|
claims: []*resourceapi.ResourceClaim{breakCELInClaim(pendingClaim)},
|
|
classes: []*resourceapi.DeviceClass{deviceClass},
|
|
objs: []apiruntime.Object{workerNodeSlice},
|
|
want: want{
|
|
filter: perNodeResult{
|
|
workerNode.Name: {
|
|
status: fwk.AsStatus(errors.New(`claim default/my-pod-my-resource: selector #0: CEL runtime error: no such key: ` + string(attrName))),
|
|
},
|
|
},
|
|
},
|
|
},
|
|
|
|
"class-parameters-CEL-runtime-error": {
|
|
pod: podWithClaimName,
|
|
claims: []*resourceapi.ResourceClaim{pendingClaim},
|
|
classes: []*resourceapi.DeviceClass{breakCELInClass(deviceClass)},
|
|
objs: []apiruntime.Object{workerNodeSlice},
|
|
want: want{
|
|
filter: perNodeResult{
|
|
workerNode.Name: {
|
|
status: fwk.AsStatus(errors.New(`class my-resource-class: selector #0: CEL runtime error: no such key: ` + string(attrName))),
|
|
},
|
|
},
|
|
},
|
|
},
|
|
|
|
// When pod scheduling encounters CEL runtime errors for some nodes, but not all,
|
|
// it should still not schedule the pod because there is something wrong with it.
|
|
// Scheduling it would make it harder to detect that there is a problem.
|
|
//
|
|
// This matches the "keeps pod pending because of CEL runtime errors" E2E test.
|
|
"CEL-runtime-error-for-one-of-two-nodes": {
|
|
nodes: []*v1.Node{workerNode, workerNode2},
|
|
pod: podWithClaimName,
|
|
claims: []*resourceapi.ResourceClaim{breakCELInClaim(pendingClaim)},
|
|
classes: []*resourceapi.DeviceClass{deviceClass},
|
|
objs: []apiruntime.Object{workerNodeSlice, workerNode2Slice},
|
|
want: want{
|
|
filter: perNodeResult{
|
|
workerNode.Name: {
|
|
status: fwk.AsStatus(errors.New(`claim default/my-pod-my-resource: selector #0: CEL runtime error: no such key: ` + string(attrName))),
|
|
},
|
|
},
|
|
},
|
|
},
|
|
|
|
// When two nodes where found, PreScore gets called.
|
|
"CEL-runtime-error-for-one-of-three-nodes": {
|
|
nodes: []*v1.Node{workerNode, workerNode2, workerNode3},
|
|
pod: podWithClaimName,
|
|
claims: []*resourceapi.ResourceClaim{breakCELInClaim(pendingClaim)},
|
|
classes: []*resourceapi.DeviceClass{deviceClass},
|
|
objs: []apiruntime.Object{workerNodeSlice, workerNode2Slice, workerNode3Slice},
|
|
want: want{
|
|
filter: perNodeResult{
|
|
workerNode.Name: {
|
|
status: fwk.NewStatus(fwk.UnschedulableAndUnresolvable, `claim default/my-pod-my-resource: selector #0: CEL runtime error: no such key: `+string(attrName)),
|
|
},
|
|
},
|
|
prescore: result{
|
|
// This is the error found during Filter.
|
|
status: fwk.NewStatus(fwk.UnschedulableAndUnresolvable, `filter node worker: claim default/my-pod-my-resource: selector #0: CEL runtime error: no such key: healthy`),
|
|
},
|
|
},
|
|
},
|
|
|
|
"missing-class": {
|
|
pod: podWithClaimName,
|
|
claims: []*resourceapi.ResourceClaim{pendingClaim},
|
|
want: want{
|
|
prefilter: result{
|
|
status: fwk.NewStatus(fwk.UnschedulableAndUnresolvable, fmt.Sprintf("request req-1: device class %s does not exist", className)),
|
|
},
|
|
postfilter: result{
|
|
status: fwk.NewStatus(fwk.Unschedulable, `no new claims to deallocate`),
|
|
},
|
|
},
|
|
},
|
|
"wrong-topology": {
|
|
// PostFilter tries to get the pod scheduleable by
|
|
// deallocating the claim.
|
|
pod: podWithClaimName,
|
|
claims: []*resourceapi.ResourceClaim{allocatedClaimWithWrongTopology},
|
|
want: want{
|
|
filter: perNodeResult{
|
|
workerNode.Name: {
|
|
status: fwk.NewStatus(fwk.UnschedulableAndUnresolvable, `resourceclaim not available on the node`),
|
|
},
|
|
},
|
|
postfilter: result{
|
|
// Claims get deallocated immediately.
|
|
changes: change{
|
|
claim: func(in *resourceapi.ResourceClaim) *resourceapi.ResourceClaim {
|
|
return st.FromResourceClaim(in).
|
|
Allocation(nil).
|
|
Obj()
|
|
},
|
|
},
|
|
status: fwk.NewStatus(fwk.Unschedulable, `deallocation of ResourceClaim completed`),
|
|
},
|
|
},
|
|
},
|
|
"good-topology": {
|
|
pod: podWithClaimName,
|
|
claims: []*resourceapi.ResourceClaim{allocatedClaimWithGoodTopology},
|
|
want: want{
|
|
prebind: result{
|
|
assumedClaim: reserve(allocatedClaimWithGoodTopology, podWithClaimName),
|
|
changes: change{
|
|
claim: func(in *resourceapi.ResourceClaim) *resourceapi.ResourceClaim {
|
|
return st.FromResourceClaim(in).
|
|
ReservedFor(resourceapi.ResourceClaimConsumerReference{Resource: "pods", Name: podName, UID: types.UID(podUID)}).
|
|
Obj()
|
|
},
|
|
},
|
|
},
|
|
},
|
|
},
|
|
"bind-failure": {
|
|
pod: podWithClaimName,
|
|
claims: []*resourceapi.ResourceClaim{allocatedClaimWithGoodTopology},
|
|
want: want{
|
|
prebind: result{
|
|
assumedClaim: reserve(allocatedClaimWithGoodTopology, podWithClaimName),
|
|
changes: change{
|
|
claim: func(in *resourceapi.ResourceClaim) *resourceapi.ResourceClaim {
|
|
return st.FromResourceClaim(in).
|
|
ReservedFor(resourceapi.ResourceClaimConsumerReference{Resource: "pods", Name: podName, UID: types.UID(podUID)}).
|
|
Obj()
|
|
},
|
|
},
|
|
},
|
|
unreserveAfterBindFailure: &result{
|
|
assumedClaim: reserve(allocatedClaimWithGoodTopology, podWithClaimName),
|
|
changes: change{
|
|
claim: func(in *resourceapi.ResourceClaim) *resourceapi.ResourceClaim {
|
|
out := in.DeepCopy()
|
|
out.Status.ReservedFor = []resourceapi.ResourceClaimConsumerReference{}
|
|
return out
|
|
},
|
|
},
|
|
},
|
|
},
|
|
},
|
|
"reserved-okay": {
|
|
pod: podWithClaimName,
|
|
claims: []*resourceapi.ResourceClaim{inUseClaim},
|
|
},
|
|
"DRA-disabled": {
|
|
pod: podWithClaimName,
|
|
claims: []*resourceapi.ResourceClaim{inUseClaim},
|
|
want: want{
|
|
prefilter: result{
|
|
status: fwk.NewStatus(fwk.Skip),
|
|
},
|
|
postfilter: result{
|
|
status: fwk.NewStatus(fwk.Unschedulable, `plugin disabled`),
|
|
},
|
|
preBindPreFlightStatus: fwk.NewStatus(fwk.Skip),
|
|
},
|
|
disableDRA: true,
|
|
},
|
|
"claim-with-request-with-unknown-device-class": {
|
|
pod: podWithClaimName,
|
|
claims: []*resourceapi.ResourceClaim{updateDeviceClassName(claim, "does-not-exist")},
|
|
want: want{
|
|
prefilter: result{
|
|
status: fwk.NewStatus(fwk.Unschedulable, `request req-1: device class does-not-exist does not exist`),
|
|
},
|
|
postfilter: result{
|
|
status: fwk.NewStatus(fwk.Unschedulable, `no new claims to deallocate`),
|
|
},
|
|
},
|
|
},
|
|
"claim-with-prioritized-list-feature-disabled": {
|
|
enableDRAPrioritizedList: false,
|
|
pod: podWithClaimName,
|
|
claims: []*resourceapi.ResourceClaim{claimWithPrioritzedList},
|
|
classes: []*resourceapi.DeviceClass{deviceClass},
|
|
want: want{
|
|
prefilter: result{
|
|
status: fwk.NewStatus(fwk.UnschedulableAndUnresolvable, `claim default/my-pod-my-resource, request req-1: has subrequests, but the DRAPrioritizedList feature is disabled`),
|
|
},
|
|
postfilter: result{
|
|
status: fwk.NewStatus(fwk.Unschedulable, `no new claims to deallocate`),
|
|
},
|
|
},
|
|
},
|
|
"claim-with-prioritized-list-unknown-device-class": {
|
|
enableDRAPrioritizedList: true,
|
|
pod: podWithClaimName,
|
|
claims: []*resourceapi.ResourceClaim{updateDeviceClassName(claimWithPrioritzedList, "does-not-exist")},
|
|
want: want{
|
|
prefilter: result{
|
|
status: fwk.NewStatus(fwk.Unschedulable, `request req-1/subreq-1: device class does-not-exist does not exist`),
|
|
},
|
|
postfilter: result{
|
|
status: fwk.NewStatus(fwk.Unschedulable, `no new claims to deallocate`),
|
|
},
|
|
},
|
|
},
|
|
"claim-with-prioritized-list": {
|
|
enableDRAPrioritizedList: true,
|
|
pod: podWithClaimName,
|
|
claims: []*resourceapi.ResourceClaim{pendingClaimWithPrioritizedList},
|
|
classes: []*resourceapi.DeviceClass{deviceClass},
|
|
objs: []apiruntime.Object{workerNodeSlice},
|
|
want: want{
|
|
reserve: result{
|
|
inFlightClaims: []metav1.Object{allocatedClaimWithPrioritizedList},
|
|
},
|
|
prebind: result{
|
|
assumedClaim: reserve(allocatedClaimWithPrioritizedList, podWithClaimName),
|
|
changes: change{
|
|
claim: func(claim *resourceapi.ResourceClaim) *resourceapi.ResourceClaim {
|
|
if claim.Name == claimName {
|
|
claim = claim.DeepCopy()
|
|
claim.Finalizers = allocatedClaimWithPrioritizedList.Finalizers
|
|
claim.Status = inUseClaimWithPrioritizedList.Status
|
|
}
|
|
return claim
|
|
},
|
|
},
|
|
},
|
|
},
|
|
},
|
|
"extended-resource-name-with-node-resource": {
|
|
enableDRAExtendedResource: true,
|
|
enableDRADeviceBindingConditions: true,
|
|
enableDRAResourceClaimDeviceStatus: true,
|
|
nodes: []*v1.Node{workerNodeWithExtendedResource},
|
|
pod: podWithExtendedResourceName,
|
|
classes: []*resourceapi.DeviceClass{deviceClassWithExtendResourceName},
|
|
want: want{},
|
|
metrics: func(tCtx ktesting.TContext, g compbasemetrics.Gatherer) {
|
|
_, err := testutil.GetCounterValuesFromGatherer(g, "scheduler_resourceclaim_creates_total", map[string]string{}, "status")
|
|
require.ErrorContains(tCtx, err, "not found")
|
|
},
|
|
},
|
|
"extended-resource-one-device-plugin-one-dra": {
|
|
enableDRAExtendedResource: true,
|
|
enableDRADeviceBindingConditions: true,
|
|
enableDRAResourceClaimDeviceStatus: true,
|
|
nodes: []*v1.Node{workerNodeWithExtendedResource},
|
|
pod: podWithExtendedResourceName2,
|
|
classes: []*resourceapi.DeviceClass{deviceClassWithExtendResourceName, deviceClassWithExtendResourceName2},
|
|
objs: []apiruntime.Object{workerNodeSlice, podWithExtendedResourceName2},
|
|
want: want{
|
|
reserve: result{
|
|
inFlightClaims: []metav1.Object{extendedResourceClaimNoName2},
|
|
},
|
|
prebind: result{
|
|
assumedClaim: addAllocationTimestamp(reserve(extendedResourceClaim2, podWithExtendedResourceName2)),
|
|
added: []metav1.Object{addAllocationTimestamp(reserve(extendedResourceClaim2, podWithExtendedResourceName2))},
|
|
},
|
|
postbind: result{
|
|
assumedClaim: addAllocationTimestamp(reserve(extendedResourceClaim2, podWithExtendedResourceName2)),
|
|
},
|
|
},
|
|
},
|
|
"extended-resource-name-with-zero-allocatable": {
|
|
enableDRAExtendedResource: true,
|
|
nodes: []*v1.Node{workerNodeWithExtendedResourceZeroAllocatable},
|
|
pod: podWithExtendedResourceName,
|
|
classes: []*resourceapi.DeviceClass{deviceClassWithExtendResourceName},
|
|
objs: []apiruntime.Object{workerNodeSlice, podWithExtendedResourceName},
|
|
want: want{
|
|
reserve: result{
|
|
inFlightClaims: []metav1.Object{extendedResourceClaimNoName},
|
|
},
|
|
prebind: result{
|
|
assumedClaim: reserve(extendedResourceClaim, podWithExtendedResourceName),
|
|
added: []metav1.Object{reserve(extendedResourceClaim, podWithExtendedResourceName)},
|
|
},
|
|
postbind: result{
|
|
assumedClaim: reserve(extendedResourceClaim, podWithExtendedResourceName),
|
|
},
|
|
},
|
|
},
|
|
"non-DRA-extended-resource-name-with-zero-allocatable": {
|
|
enableDRAExtendedResource: true,
|
|
nodes: []*v1.Node{workerNodeWithExtendedResourceZeroAllocatable},
|
|
pod: podWithExtendedResourceName,
|
|
classes: []*resourceapi.DeviceClass{deviceClass},
|
|
objs: []apiruntime.Object{workerNodeSlice, podWithExtendedResourceName},
|
|
want: want{
|
|
prefilter: result{
|
|
status: fwk.NewStatus(fwk.Skip),
|
|
},
|
|
preBindPreFlightStatus: fwk.NewStatus(fwk.Skip),
|
|
},
|
|
},
|
|
"extended-resource-name-no-resource": {
|
|
enableDRAExtendedResource: true,
|
|
pod: podWithExtendedResourceName,
|
|
classes: []*resourceapi.DeviceClass{deviceClassWithExtendResourceName},
|
|
want: want{
|
|
filter: perNodeResult{
|
|
workerNode.Name: {
|
|
status: fwk.NewStatus(fwk.UnschedulableAndUnresolvable, `cannot allocate all claims`),
|
|
},
|
|
},
|
|
postfilter: result{
|
|
status: fwk.NewStatus(fwk.Unschedulable, `still not schedulable`),
|
|
},
|
|
},
|
|
metrics: func(tCtx ktesting.TContext, g compbasemetrics.Gatherer) {
|
|
_, err := testutil.GetCounterValuesFromGatherer(g, "scheduler_resourceclaim_creates_total", map[string]string{}, "status")
|
|
require.ErrorContains(tCtx, err, "not found")
|
|
},
|
|
},
|
|
"extended-resource-name-with-resources": {
|
|
enableDRAExtendedResource: true,
|
|
pod: podWithExtendedResourceName,
|
|
classes: []*resourceapi.DeviceClass{deviceClassWithExtendResourceName},
|
|
objs: []apiruntime.Object{workerNodeSlice, podWithExtendedResourceName},
|
|
want: want{
|
|
reserve: result{
|
|
inFlightClaims: []metav1.Object{extendedResourceClaimNoName},
|
|
},
|
|
prebind: result{
|
|
assumedClaim: reserve(extendedResourceClaim, podWithExtendedResourceName),
|
|
added: []metav1.Object{reserve(extendedResourceClaim, podWithExtendedResourceName)},
|
|
},
|
|
postbind: result{
|
|
assumedClaim: reserve(extendedResourceClaim, podWithExtendedResourceName),
|
|
},
|
|
},
|
|
metrics: func(tCtx ktesting.TContext, g compbasemetrics.Gatherer) {
|
|
metric, err := testutil.GetCounterValuesFromGatherer(g, "scheduler_resourceclaim_creates_total", map[string]string{}, "status")
|
|
require.NoError(tCtx, err)
|
|
require.Equal(tCtx, 1, int(metric["success"]))
|
|
},
|
|
},
|
|
"implicit-extended-resource-name-with-resources": {
|
|
enableDRAExtendedResource: true,
|
|
pod: podWithImplicitExtendedResourceName,
|
|
classes: []*resourceapi.DeviceClass{deviceClassWithExtendResourceName},
|
|
objs: []apiruntime.Object{largeWorkerNodeSlice, podWithImplicitExtendedResourceName},
|
|
want: want{
|
|
reserve: result{
|
|
inFlightClaims: []metav1.Object{implicitExtendedResourceClaimNoName},
|
|
},
|
|
prebind: result{
|
|
assumedClaim: reserve(implicitExtendedResourceClaim, podWithImplicitExtendedResourceName),
|
|
added: []metav1.Object{reserve(implicitExtendedResourceClaim, podWithImplicitExtendedResourceName)},
|
|
},
|
|
postbind: result{
|
|
assumedClaim: reserve(implicitExtendedResourceClaim, podWithImplicitExtendedResourceName),
|
|
},
|
|
},
|
|
metrics: func(tCtx ktesting.TContext, g compbasemetrics.Gatherer) {
|
|
metric, err := testutil.GetCounterValuesFromGatherer(g, "scheduler_resourceclaim_creates_total", map[string]string{}, "status")
|
|
require.NoError(tCtx, err)
|
|
require.Equal(tCtx, 1, int(metric["success"]))
|
|
},
|
|
},
|
|
"implicit-extended-resource-name-two-containers-with-resources": {
|
|
enableDRAExtendedResource: true,
|
|
pod: podWithImplicitExtendedResourceNameTwoContainers,
|
|
classes: []*resourceapi.DeviceClass{deviceClassWithExtendResourceName},
|
|
objs: []apiruntime.Object{largeWorkerNodeSlice, podWithImplicitExtendedResourceNameTwoContainers},
|
|
want: want{
|
|
reserve: result{
|
|
inFlightClaims: []metav1.Object{implicitExtendedResourceClaimNoNameTwoContainers},
|
|
},
|
|
prebind: result{
|
|
assumedClaim: reserve(implicitExtendedResourceClaimTwoContainers, podWithImplicitExtendedResourceNameTwoContainers),
|
|
added: []metav1.Object{reserve(implicitExtendedResourceClaimTwoContainers, podWithImplicitExtendedResourceNameTwoContainers)},
|
|
},
|
|
postbind: result{
|
|
assumedClaim: reserve(implicitExtendedResourceClaimTwoContainers, podWithImplicitExtendedResourceNameTwoContainers),
|
|
},
|
|
},
|
|
metrics: func(tCtx ktesting.TContext, g compbasemetrics.Gatherer) {
|
|
metric, err := testutil.GetCounterValuesFromGatherer(g, "scheduler_resourceclaim_creates_total", map[string]string{}, "status")
|
|
require.NoError(tCtx, err)
|
|
require.Equal(tCtx, 1, int(metric["success"]))
|
|
},
|
|
},
|
|
"extended-resource-name-with-resources-fail-patch": {
|
|
enableDRAExtendedResource: true,
|
|
failPatch: true,
|
|
pod: podWithExtendedResourceName,
|
|
classes: []*resourceapi.DeviceClass{deviceClassWithExtendResourceName},
|
|
objs: []apiruntime.Object{workerNodeSlice, podWithExtendedResourceName},
|
|
want: want{
|
|
reserve: result{
|
|
inFlightClaims: []metav1.Object{extendedResourceClaimNoName},
|
|
},
|
|
prebind: result{
|
|
assumedClaim: reserve(extendedResourceClaim, podWithExtendedResourceName),
|
|
added: []metav1.Object{reserve(extendedResourceClaim, podWithExtendedResourceName)},
|
|
status: fwk.NewStatus(fwk.Unschedulable, `patch error`),
|
|
},
|
|
postbind: result{
|
|
assumedClaim: reserve(extendedResourceClaim, podWithExtendedResourceName),
|
|
},
|
|
},
|
|
metrics: func(tCtx ktesting.TContext, g compbasemetrics.Gatherer) {
|
|
metric, err := testutil.GetCounterValuesFromGatherer(g, "scheduler_resourceclaim_creates_total", map[string]string{}, "status")
|
|
require.NoError(tCtx, err)
|
|
require.Equal(tCtx, 1, int(metric["success"]))
|
|
},
|
|
},
|
|
"extended-resource-name-with-resources-has-claim": {
|
|
enableDRAExtendedResource: true,
|
|
pod: podWithExtendedResourceName,
|
|
claims: []*resourceapi.ResourceClaim{extendedResourceClaim},
|
|
classes: []*resourceapi.DeviceClass{deviceClassWithExtendResourceName},
|
|
objs: []apiruntime.Object{workerNodeSlice, podWithExtendedResourceName},
|
|
want: want{
|
|
filter: perNodeResult{
|
|
workerNode.Name: {
|
|
status: fwk.NewStatus(fwk.UnschedulableAndUnresolvable, `cannot schedule extended resource claim`),
|
|
},
|
|
},
|
|
postfilter: result{
|
|
status: fwk.NewStatus(fwk.Unschedulable, `deletion of ResourceClaim completed`),
|
|
removed: []metav1.Object{extendedResourceClaim},
|
|
},
|
|
},
|
|
metrics: func(tCtx ktesting.TContext, g compbasemetrics.Gatherer) {
|
|
_, err := testutil.GetCounterValuesFromGatherer(g, "scheduler_resourceclaim_creates_total", map[string]string{}, "status")
|
|
require.ErrorContains(tCtx, err, "not found")
|
|
},
|
|
},
|
|
"extended-resource-name-with-resources-delete-claim": {
|
|
enableDRAExtendedResource: true,
|
|
pod: podWithExtendedResourceName,
|
|
claims: []*resourceapi.ResourceClaim{extendedResourceClaimNode2},
|
|
classes: []*resourceapi.DeviceClass{deviceClassWithExtendResourceName},
|
|
objs: []apiruntime.Object{workerNodeSlice, podWithExtendedResourceName},
|
|
want: want{
|
|
filter: perNodeResult{
|
|
workerNode.Name: {
|
|
status: fwk.NewStatus(fwk.UnschedulableAndUnresolvable, `cannot schedule extended resource claim`),
|
|
},
|
|
},
|
|
postfilter: result{
|
|
status: fwk.NewStatus(fwk.Unschedulable, `deletion of ResourceClaim completed`),
|
|
removed: []metav1.Object{extendedResourceClaimNode2},
|
|
},
|
|
},
|
|
metrics: func(tCtx ktesting.TContext, g compbasemetrics.Gatherer) {
|
|
_, err := testutil.GetCounterValuesFromGatherer(g, "scheduler_resourceclaim_creates_total", map[string]string{}, "status")
|
|
require.ErrorContains(tCtx, err, "not found")
|
|
},
|
|
},
|
|
"extended-resource-name-bind-failure": {
|
|
enableDRAExtendedResource: true,
|
|
pod: podWithExtendedResourceName,
|
|
classes: []*resourceapi.DeviceClass{deviceClassWithExtendResourceName},
|
|
objs: []apiruntime.Object{workerNodeSlice, podWithExtendedResourceName},
|
|
want: want{
|
|
reserve: result{
|
|
inFlightClaims: []metav1.Object{extendedResourceClaimNoName},
|
|
},
|
|
prebind: result{
|
|
assumedClaim: reserve(extendedResourceClaim, podWithExtendedResourceName),
|
|
added: []metav1.Object{reserve(extendedResourceClaim, podWithExtendedResourceName)},
|
|
},
|
|
unreserveAfterBindFailure: &result{
|
|
removed: []metav1.Object{reserve(extendedResourceClaim, podWithExtendedResourceName)},
|
|
},
|
|
},
|
|
metrics: func(tCtx ktesting.TContext, g compbasemetrics.Gatherer) {
|
|
metric, err := testutil.GetCounterValuesFromGatherer(g, "scheduler_resourceclaim_creates_total", map[string]string{}, "status")
|
|
require.NoError(tCtx, err)
|
|
require.Equal(tCtx, 1, int(metric["success"]))
|
|
},
|
|
},
|
|
"extended-resource-name-skip-bind": {
|
|
enableDRAExtendedResource: true,
|
|
pod: podWithExtendedResourceName,
|
|
classes: []*resourceapi.DeviceClass{deviceClassWithExtendResourceName},
|
|
objs: []apiruntime.Object{workerNodeSlice, podWithExtendedResourceName},
|
|
want: want{
|
|
reserve: result{
|
|
inFlightClaims: []metav1.Object{extendedResourceClaimNoName},
|
|
},
|
|
unreserveBeforePreBind: &result{},
|
|
},
|
|
metrics: func(tCtx ktesting.TContext, g compbasemetrics.Gatherer) {
|
|
metric, err := testutil.GetCounterValuesFromGatherer(g, "scheduler_resourceclaim_creates_total", map[string]string{}, "status")
|
|
require.NoError(tCtx, err)
|
|
require.Equal(tCtx, 1, int(metric["success"]))
|
|
},
|
|
},
|
|
"extended-resource-name-claim-creation-failure": {
|
|
enableDRAExtendedResource: true,
|
|
pod: podWithExtendedResourceName,
|
|
classes: []*resourceapi.DeviceClass{deviceClassWithExtendResourceName},
|
|
objs: []apiruntime.Object{workerNodeSlice, podWithExtendedResourceName},
|
|
want: want{
|
|
reserve: result{
|
|
inFlightClaims: []metav1.Object{extendedResourceClaimNoName},
|
|
},
|
|
prebind: result{
|
|
status: fwk.NewStatus(fwk.Unschedulable, `claim creation errors`),
|
|
},
|
|
unreserveAfterBindFailure: &result{
|
|
removed: []metav1.Object{reserve(extendedResourceClaim, podWithExtendedResourceName)},
|
|
},
|
|
},
|
|
reactors: []cgotesting.Reactor{
|
|
&cgotesting.SimpleReactor{
|
|
Verb: "create",
|
|
Resource: "resourceclaims",
|
|
Reaction: func(action cgotesting.Action) (handled bool, ret apiruntime.Object, err error) {
|
|
return true, nil, apierrors.NewBadRequest("claim creation errors")
|
|
},
|
|
},
|
|
},
|
|
metrics: func(tCtx ktesting.TContext, g compbasemetrics.Gatherer) {
|
|
metric, err := testutil.GetCounterValuesFromGatherer(g, "scheduler_resourceclaim_creates_total", map[string]string{}, "status")
|
|
require.NoError(tCtx, err)
|
|
require.Equal(tCtx, 1, int(metric["failure"]))
|
|
},
|
|
},
|
|
"canceled": {
|
|
cancelFilter: true,
|
|
args: &config.DynamicResourcesArgs{
|
|
FilterTimeout: &metav1.Duration{Duration: time.Nanosecond},
|
|
},
|
|
pod: podWithClaimName,
|
|
claims: []*resourceapi.ResourceClaim{largeClaim},
|
|
classes: []*resourceapi.DeviceClass{deviceClass},
|
|
objs: []apiruntime.Object{largeWorkerNodeSlice},
|
|
want: want{
|
|
filter: perNodeResult{
|
|
workerNode.Name: {
|
|
status: fwk.NewStatus(fwk.UnschedulableAndUnresolvable, `asked by caller to stop allocating devices: test canceling Filter`),
|
|
},
|
|
},
|
|
postfilter: result{
|
|
status: fwk.NewStatus(fwk.Unschedulable, `still not schedulable`),
|
|
},
|
|
},
|
|
},
|
|
"timeout": {
|
|
args: &config.DynamicResourcesArgs{
|
|
FilterTimeout: &metav1.Duration{Duration: time.Nanosecond},
|
|
},
|
|
pod: podWithClaimName,
|
|
claims: []*resourceapi.ResourceClaim{largeClaim},
|
|
classes: []*resourceapi.DeviceClass{deviceClass},
|
|
objs: []apiruntime.Object{largeWorkerNodeSlice},
|
|
want: want{
|
|
filter: perNodeResult{
|
|
workerNode.Name: {
|
|
status: fwk.NewStatus(fwk.UnschedulableAndUnresolvable, `timed out trying to allocate devices`),
|
|
},
|
|
},
|
|
postfilter: result{
|
|
status: fwk.NewStatus(fwk.Unschedulable, `still not schedulable`),
|
|
},
|
|
},
|
|
// Skipping this test case on Windows as a 1ns timeout is not guaranteed to
|
|
// expire immediately on Windows due to its coarser timer granularity -
|
|
// typically in the range of 0.5 to 15.6 ms
|
|
skipOnWindows: "coarse timer granularity",
|
|
},
|
|
"timeout_disabled": {
|
|
// This variant uses the normal test objects to avoid excessive runtime.
|
|
// It could theoretically pass even though the 1 ns limit is enforced
|
|
// although it shouldn't be (which then would be a false positive),
|
|
// but that's unlikely.
|
|
disableDRASchedulerFilterTimeout: true,
|
|
args: &config.DynamicResourcesArgs{},
|
|
pod: podWithClaimName,
|
|
claims: []*resourceapi.ResourceClaim{pendingClaim},
|
|
classes: []*resourceapi.DeviceClass{deviceClass},
|
|
objs: []apiruntime.Object{workerNodeSlice},
|
|
want: want{
|
|
reserve: result{
|
|
inFlightClaims: []metav1.Object{allocatedClaim},
|
|
},
|
|
prebind: result{
|
|
assumedClaim: reserve(allocatedClaim, podWithClaimName),
|
|
changes: change{
|
|
claim: func(claim *resourceapi.ResourceClaim) *resourceapi.ResourceClaim {
|
|
if claim.Name == claimName {
|
|
claim = claim.DeepCopy()
|
|
claim.Finalizers = allocatedClaim.Finalizers
|
|
claim.Status = inUseClaim.Status
|
|
}
|
|
return claim
|
|
},
|
|
},
|
|
},
|
|
postbind: result{
|
|
assumedClaim: reserve(allocatedClaim, podWithClaimName),
|
|
},
|
|
},
|
|
},
|
|
"timeout_zero": {
|
|
args: &config.DynamicResourcesArgs{
|
|
FilterTimeout: &metav1.Duration{Duration: 0},
|
|
},
|
|
pod: podWithClaimName,
|
|
claims: []*resourceapi.ResourceClaim{pendingClaim},
|
|
classes: []*resourceapi.DeviceClass{deviceClass},
|
|
objs: []apiruntime.Object{workerNodeSlice},
|
|
want: want{
|
|
reserve: result{
|
|
inFlightClaims: []metav1.Object{allocatedClaim},
|
|
},
|
|
prebind: result{
|
|
assumedClaim: reserve(allocatedClaim, podWithClaimName),
|
|
changes: change{
|
|
claim: func(claim *resourceapi.ResourceClaim) *resourceapi.ResourceClaim {
|
|
if claim.Name == claimName {
|
|
claim = claim.DeepCopy()
|
|
claim.Finalizers = allocatedClaim.Finalizers
|
|
claim.Status = inUseClaim.Status
|
|
}
|
|
return claim
|
|
},
|
|
},
|
|
},
|
|
postbind: result{
|
|
assumedClaim: reserve(allocatedClaim, podWithClaimName),
|
|
},
|
|
},
|
|
},
|
|
"dont-add-allocation-timestamp": {
|
|
pod: podWithClaimName,
|
|
claims: []*resourceapi.ResourceClaim{pendingClaim},
|
|
classes: []*resourceapi.DeviceClass{deviceClass},
|
|
objs: []apiruntime.Object{workerNodeSlice},
|
|
want: want{
|
|
reserve: result{
|
|
inFlightClaims: []metav1.Object{allocatedClaim},
|
|
},
|
|
prebind: result{
|
|
assumedClaim: reserve(allocatedClaim, podWithClaimName),
|
|
changes: change{
|
|
claim: func(in *resourceapi.ResourceClaim) *resourceapi.ResourceClaim {
|
|
return reserve(allocatedClaim, podWithClaimName)
|
|
},
|
|
},
|
|
},
|
|
},
|
|
},
|
|
"add-allocation-timestamp": {
|
|
enableDRADeviceBindingConditions: true,
|
|
enableDRAResourceClaimDeviceStatus: true,
|
|
pod: podWithClaimName,
|
|
claims: []*resourceapi.ResourceClaim{pendingClaim},
|
|
classes: []*resourceapi.DeviceClass{deviceClass},
|
|
objs: []apiruntime.Object{workerNodeSlice},
|
|
want: want{
|
|
reserve: result{
|
|
inFlightClaims: []metav1.Object{allocatedClaim},
|
|
},
|
|
prebind: result{
|
|
assumedClaim: addAllocationTimestamp(reserve(allocatedClaim, podWithClaimName)),
|
|
changes: change{
|
|
claim: func(in *resourceapi.ResourceClaim) *resourceapi.ResourceClaim {
|
|
return addAllocationTimestamp(reserve(allocatedClaim, podWithClaimName))
|
|
},
|
|
},
|
|
},
|
|
},
|
|
},
|
|
"add-allocation-timestamp-failure": {
|
|
enableDRADeviceBindingConditions: true,
|
|
enableDRAResourceClaimDeviceStatus: true,
|
|
pod: podWithClaimName,
|
|
claims: []*resourceapi.ResourceClaim{allocatedClaim},
|
|
prepare: prepare{
|
|
prebind: change{
|
|
claim: func(in *resourceapi.ResourceClaim) *resourceapi.ResourceClaim {
|
|
// Simulate deallocation before PreBind runs.
|
|
return st.FromResourceClaim(in).
|
|
Allocation(nil).
|
|
Obj()
|
|
},
|
|
},
|
|
},
|
|
want: want{
|
|
prebind: result{
|
|
status: fwk.AsStatus(fmt.Errorf("claim %s got deallocated elsewhere in the meantime", klog.KObj(allocatedClaim))),
|
|
},
|
|
},
|
|
},
|
|
"bind-claim-with-binding-conditions": {
|
|
enableDRADeviceBindingConditions: true,
|
|
enableDRAResourceClaimDeviceStatus: true,
|
|
pod: podWithClaimName,
|
|
claims: []*resourceapi.ResourceClaim{pendingClaim},
|
|
classes: []*resourceapi.DeviceClass{deviceClass},
|
|
objs: []apiruntime.Object{fabricSlice},
|
|
args: &config.DynamicResourcesArgs{
|
|
// Time out quickly in PreBind. There's no controller which sets the
|
|
// binding conditions.
|
|
BindingTimeout: &metav1.Duration{Duration: time.Second},
|
|
},
|
|
want: want{
|
|
reserve: result{
|
|
inFlightClaims: []metav1.Object{func() *resourceapi.ResourceClaim {
|
|
claim := bindClaim.DeepCopy()
|
|
// Will get set in PreBind.
|
|
claim.Status.Allocation.AllocationTimestamp = nil
|
|
return claim
|
|
}()},
|
|
},
|
|
prebind: result{
|
|
assumedClaim: reserve(bindClaim, podWithClaimName),
|
|
changes: change{
|
|
claim: func(in *resourceapi.ResourceClaim) *resourceapi.ResourceClaim {
|
|
return reserve(bindClaim, podWithClaimName)
|
|
},
|
|
},
|
|
// From PreBind itself, when checking isPodReadyForBinding times out.
|
|
status: fwk.AsStatus(errors.New("device binding timeout")),
|
|
},
|
|
},
|
|
},
|
|
"bind-failure-concurrent-deallocation": {
|
|
enableDRADeviceBindingConditions: true,
|
|
enableDRAResourceClaimDeviceStatus: true,
|
|
pod: podWithClaimName,
|
|
claims: []*resourceapi.ResourceClaim{pendingClaim},
|
|
classes: []*resourceapi.DeviceClass{deviceClass},
|
|
objs: []apiruntime.Object{fabricSlice},
|
|
args: &config.DynamicResourcesArgs{
|
|
// Time out quickly in PreBind. There's no controller which sets the
|
|
// binding conditions.
|
|
BindingTimeout: &metav1.Duration{Duration: time.Second},
|
|
},
|
|
want: want{
|
|
reserve: result{
|
|
inFlightClaims: []metav1.Object{func() *resourceapi.ResourceClaim {
|
|
claim := bindClaim.DeepCopy()
|
|
// Will get set in PreBind.
|
|
claim.Status.Allocation.AllocationTimestamp = nil
|
|
return claim
|
|
}()},
|
|
},
|
|
prebind: result{
|
|
assumedClaim: reserve(bindClaim, podWithClaimName),
|
|
changes: change{
|
|
claim: func(in *resourceapi.ResourceClaim) *resourceapi.ResourceClaim {
|
|
return reserve(bindClaim, podWithClaimName)
|
|
},
|
|
},
|
|
// From PreBind itself, when checking isPodReadyForBinding times out.
|
|
status: fwk.AsStatus(errors.New("device binding timeout")),
|
|
},
|
|
},
|
|
},
|
|
"bound-claim-with-succeeded-binding-conditions": {
|
|
enableDRADeviceBindingConditions: true,
|
|
enableDRAResourceClaimDeviceStatus: true,
|
|
pod: podWithClaimName,
|
|
claims: []*resourceapi.ResourceClaim{boundClaim},
|
|
want: want{
|
|
prebind: result{
|
|
assumedClaim: reserve(boundClaim, podWithClaimName),
|
|
changes: change{
|
|
claim: func(in *resourceapi.ResourceClaim) *resourceapi.ResourceClaim {
|
|
return st.FromResourceClaim(in).
|
|
ReservedFor(resourceapi.ResourceClaimConsumerReference{Resource: "pods", Name: podName, UID: types.UID(podUID)}).
|
|
Obj()
|
|
},
|
|
},
|
|
status: nil,
|
|
},
|
|
},
|
|
metrics: func(tCtx ktesting.TContext, g compbasemetrics.Gatherer) {
|
|
// Counter: allocations_total should have exactly one event
|
|
allocs, err := testutil.GetCounterValuesFromGatherer(
|
|
g,
|
|
"scheduler_dra_bindingconditions_allocations_total",
|
|
map[string]string{
|
|
"status": "success",
|
|
},
|
|
"driver", // group by driver label
|
|
)
|
|
require.NoError(tCtx, err)
|
|
|
|
var totalAllocs float64
|
|
for _, v := range allocs {
|
|
totalAllocs += v
|
|
}
|
|
require.InEpsilon(tCtx, float64(1), totalAllocs, 0.1, "expected exactly one successful allocation with BindingConditions")
|
|
|
|
// Histogram: one success sample with requires_bindingconditions=true
|
|
hist, err := testutil.GetHistogramVecFromGatherer(
|
|
g,
|
|
"scheduler_dra_bindingconditions_wait_duration_seconds",
|
|
map[string]string{
|
|
"status": "success",
|
|
},
|
|
)
|
|
require.NoError(tCtx, err)
|
|
require.Equal(tCtx, uint64(1), hist.GetAggregatedSampleCount(), "expected one success sample in wait duration histogram")
|
|
},
|
|
},
|
|
"bound-claim-with-failed-binding": {
|
|
enableDRADeviceBindingConditions: true,
|
|
enableDRAResourceClaimDeviceStatus: true,
|
|
pod: podWithClaimName,
|
|
claims: []*resourceapi.ResourceClaim{failedBindingClaim},
|
|
objs: []apiruntime.Object{workerNodeSlice},
|
|
want: want{
|
|
filter: perNodeResult{
|
|
workerNode.Name: {
|
|
status: fwk.NewStatus(fwk.UnschedulableAndUnresolvable, `resourceclaim not available on the node`),
|
|
},
|
|
},
|
|
postfilter: result{
|
|
changes: change{
|
|
claim: func(in *resourceapi.ResourceClaim) *resourceapi.ResourceClaim {
|
|
return st.FromResourceClaim(in).
|
|
Allocation(nil).
|
|
AllocatedDeviceStatuses(nil).
|
|
Obj()
|
|
},
|
|
},
|
|
status: fwk.NewStatus(fwk.Unschedulable, `deallocation of ResourceClaim completed`),
|
|
},
|
|
},
|
|
},
|
|
"bound-claim-with-timed-out-binding": {
|
|
enableDRADeviceBindingConditions: true,
|
|
enableDRAResourceClaimDeviceStatus: true,
|
|
pod: podWithClaimName,
|
|
claims: func() []*resourceapi.ResourceClaim {
|
|
claim := allocatedClaim.DeepCopy()
|
|
claim.Status.Allocation = allocationResultWithBindingConditions.DeepCopy()
|
|
// This claim has binding conditions but is timed out.
|
|
claim.Status.Allocation.AllocationTimestamp = ptr.To(metav1.NewTime(time.Now().Add(-10 * time.Minute)))
|
|
claim.Status.Devices = []resourceapi.AllocatedDeviceStatus{
|
|
{
|
|
Driver: driver,
|
|
Pool: nodeName,
|
|
Device: "instance-1",
|
|
},
|
|
}
|
|
return []*resourceapi.ResourceClaim{claim}
|
|
}(),
|
|
want: want{
|
|
filter: perNodeResult{
|
|
workerNode.Name: {
|
|
status: fwk.NewStatus(fwk.UnschedulableAndUnresolvable, `resourceclaim not available on the node`),
|
|
},
|
|
},
|
|
postfilter: result{
|
|
changes: change{
|
|
claim: func(in *resourceapi.ResourceClaim) *resourceapi.ResourceClaim {
|
|
return st.FromResourceClaim(in).
|
|
Allocation(nil).
|
|
AllocatedDeviceStatuses(nil).
|
|
Obj()
|
|
},
|
|
},
|
|
status: fwk.NewStatus(fwk.Unschedulable, `deallocation of ResourceClaim completed`),
|
|
},
|
|
},
|
|
},
|
|
"prebind-fail-with-binding-timeout": {
|
|
patchTestCase: func(tc *testPluginCase) {
|
|
// The time stamps must be injected into the test case right
|
|
// before it starts to get tested.
|
|
now := time.Now()
|
|
|
|
// Set the allocation time so that the claim is not timed out
|
|
// yet when the test starts, but then times out relatively quickly (the 10 seconds)
|
|
// when the test executes PreBind.
|
|
bindingTimeout := tc.args.BindingTimeout.Duration
|
|
timeoutAfter := 10 * time.Second
|
|
allocatedAt := now.Add(-bindingTimeout).Add(timeoutAfter)
|
|
|
|
claim := allocatedClaim.DeepCopy()
|
|
claim.Status.Allocation = allocationResultWithBindingConditions.DeepCopy()
|
|
// This claim has binding conditions but is not timed out.
|
|
claim.Status.Allocation.AllocationTimestamp = ptr.To(metav1.NewTime(allocatedAt))
|
|
claim.Status.Devices = []resourceapi.AllocatedDeviceStatus{
|
|
{
|
|
Driver: driver,
|
|
Pool: nodeName,
|
|
Device: "instance-1",
|
|
},
|
|
}
|
|
tc.claims = []*resourceapi.ResourceClaim{claim}
|
|
|
|
claim = claim.DeepCopy()
|
|
claim.Status.Devices = []resourceapi.AllocatedDeviceStatus{
|
|
{
|
|
Driver: driver,
|
|
Pool: nodeName,
|
|
Device: "instance-1",
|
|
},
|
|
}
|
|
tc.want.prebind.assumedClaim = reserve(claim, podWithClaimName)
|
|
},
|
|
|
|
enableDRADeviceBindingConditions: true,
|
|
enableDRAResourceClaimDeviceStatus: true,
|
|
args: &config.DynamicResourcesArgs{
|
|
BindingTimeout: &metav1.Duration{Duration: 600 * time.Second},
|
|
},
|
|
pod: podWithClaimName,
|
|
claims: nil, // Set in patchTestCase.
|
|
want: want{
|
|
prebind: result{
|
|
assumedClaim: nil, // Set in patchTestCase.
|
|
changes: change{
|
|
claim: func(in *resourceapi.ResourceClaim) *resourceapi.ResourceClaim {
|
|
return st.FromResourceClaim(in).
|
|
ReservedFor(resourceapi.ResourceClaimConsumerReference{Resource: "pods", Name: podName, UID: types.UID(podUID)}).
|
|
Obj()
|
|
},
|
|
},
|
|
// From isPodReadyForBinding.
|
|
status: fwk.AsStatus(fmt.Errorf("%w: claim=%s", ErrDeviceBindingTimeout, claim.Name)),
|
|
},
|
|
},
|
|
metrics: func(tCtx ktesting.TContext, g compbasemetrics.Gatherer) {
|
|
// Counter: timeouts_total should have exactly one event
|
|
timeouts, err := testutil.GetCounterValuesFromGatherer(
|
|
g,
|
|
"scheduler_dra_bindingconditions_allocations_total",
|
|
map[string]string{
|
|
"status": "timeout",
|
|
},
|
|
"driver",
|
|
)
|
|
require.NoError(tCtx, err)
|
|
|
|
var totalTimeouts float64
|
|
for _, v := range timeouts {
|
|
totalTimeouts += v
|
|
}
|
|
require.InEpsilon(tCtx, float64(1), totalTimeouts, 0.1, "expected exactly one timeout with BindingConditions")
|
|
|
|
// Histogram: one timeout sample with requires_bindingconditions=true
|
|
hist, err := testutil.GetHistogramVecFromGatherer(
|
|
g,
|
|
"scheduler_dra_bindingconditions_wait_duration_seconds",
|
|
map[string]string{
|
|
"status": "timeout",
|
|
},
|
|
)
|
|
require.NoError(tCtx, err)
|
|
require.Equal(tCtx, uint64(1), hist.GetAggregatedSampleCount(), "expected one timeout sample in wait duration histogram")
|
|
},
|
|
},
|
|
"bound-claim-with-mixed-binding-conditions": {
|
|
enableDRADeviceBindingConditions: true,
|
|
enableDRAResourceClaimDeviceStatus: true,
|
|
pod: podWithClaimName,
|
|
claims: func() []*resourceapi.ResourceClaim {
|
|
claim := allocatedClaim.DeepCopy()
|
|
claim.Status.Allocation = allocationResultWithBindingConditions.DeepCopy()
|
|
// This claim has binding conditions but is timed out.
|
|
claim.Status.Allocation.AllocationTimestamp = ptr.To(metav1.NewTime(time.Now().Add(-10 * time.Minute)))
|
|
claim.Status.Devices = []resourceapi.AllocatedDeviceStatus{
|
|
{
|
|
Driver: driver,
|
|
Pool: nodeName,
|
|
Device: "instance-1",
|
|
Conditions: []metav1.Condition{
|
|
{Type: "condition1", Status: metav1.ConditionTrue},
|
|
{Type: "condition2", Status: metav1.ConditionFalse},
|
|
},
|
|
},
|
|
}
|
|
return []*resourceapi.ResourceClaim{claim}
|
|
}(),
|
|
want: want{
|
|
filter: perNodeResult{
|
|
workerNode.Name: {
|
|
status: fwk.NewStatus(fwk.UnschedulableAndUnresolvable, `resourceclaim not available on the node`),
|
|
},
|
|
},
|
|
postfilter: result{
|
|
changes: change{
|
|
claim: func(in *resourceapi.ResourceClaim) *resourceapi.ResourceClaim {
|
|
return st.FromResourceClaim(in).
|
|
Allocation(nil).
|
|
AllocatedDeviceStatuses(nil).
|
|
Obj()
|
|
},
|
|
},
|
|
status: fwk.NewStatus(fwk.Unschedulable, `deallocation of ResourceClaim completed`),
|
|
},
|
|
},
|
|
},
|
|
"bound-claim-without-binding-conditions": {
|
|
enableDRADeviceBindingConditions: true,
|
|
enableDRAResourceClaimDeviceStatus: true,
|
|
// This test ensures that when DRADeviceBindingConditions is enabled,
|
|
// but the claim has no binding conditions or binding failures,
|
|
// the plugin proceeds as if all conditions are satisfied.
|
|
pod: podWithClaimTemplateInStatus,
|
|
claims: []*resourceapi.ResourceClaim{allocatedClaim, otherClaim},
|
|
want: want{
|
|
prebind: result{
|
|
assumedClaim: addAllocationTimestamp(reserve(allocatedClaim, podWithClaimTemplateInStatus)),
|
|
changes: change{
|
|
claim: func(claim *resourceapi.ResourceClaim) *resourceapi.ResourceClaim {
|
|
if claim.Name == claimName {
|
|
claim = claim.DeepCopy()
|
|
claim.Status.ReservedFor = inUseClaim.Status.ReservedFor
|
|
claim = addAllocationTimestamp(claim)
|
|
}
|
|
return claim
|
|
},
|
|
},
|
|
status: nil,
|
|
},
|
|
},
|
|
},
|
|
"multi-claims-binding-conditions-all-success": {
|
|
enableDRADeviceBindingConditions: true,
|
|
enableDRAResourceClaimDeviceStatus: true,
|
|
pod: podWithTwoClaimNames,
|
|
claims: []*resourceapi.ResourceClaim{boundClaim, boundClaim2},
|
|
classes: []*resourceapi.DeviceClass{deviceClass},
|
|
nodes: []*v1.Node{workerNode},
|
|
objs: []apiruntime.Object{fabricSlice, fabricSlice2},
|
|
want: want{
|
|
prebind: result{
|
|
assumedClaim: reserve(boundClaim, podWithTwoClaimNames),
|
|
changes: change{
|
|
claim: func(in *resourceapi.ResourceClaim) *resourceapi.ResourceClaim {
|
|
return st.FromResourceClaim(in).
|
|
ReservedFor(resourceapi.ResourceClaimConsumerReference{Resource: "pods", Name: podName, UID: types.UID(podUID)}).
|
|
Obj()
|
|
},
|
|
},
|
|
status: nil,
|
|
},
|
|
},
|
|
},
|
|
"multi-claims-binding-conditions-one-fail": {
|
|
enableDRADeviceBindingConditions: true,
|
|
enableDRAResourceClaimDeviceStatus: true,
|
|
pod: podWithTwoClaimNames,
|
|
claims: []*resourceapi.ResourceClaim{boundClaim, failedBindingClaim2},
|
|
classes: []*resourceapi.DeviceClass{deviceClass},
|
|
nodes: []*v1.Node{workerNode},
|
|
objs: []apiruntime.Object{fabricSlice, fabricSlice2},
|
|
want: want{
|
|
filter: perNodeResult{
|
|
workerNode.Name: {
|
|
status: fwk.NewStatus(fwk.UnschedulableAndUnresolvable, `resourceclaim not available on the node`),
|
|
},
|
|
},
|
|
postfilter: result{
|
|
changes: change{
|
|
claim: func(in *resourceapi.ResourceClaim) *resourceapi.ResourceClaim {
|
|
if in.Name == claimName2 {
|
|
return st.FromResourceClaim(in).
|
|
Allocation(nil).
|
|
AllocatedDeviceStatuses(nil).
|
|
Obj()
|
|
} else {
|
|
return in
|
|
}
|
|
},
|
|
},
|
|
status: fwk.NewStatus(fwk.Unschedulable, `deallocation of ResourceClaim completed`),
|
|
},
|
|
},
|
|
},
|
|
"single-claim-prioritized-list-scoring": {
|
|
enableDRAPrioritizedList: true,
|
|
pod: podWithClaimName,
|
|
claims: []*resourceapi.ResourceClaim{pendingClaimWithPrioritizedListAndSelector},
|
|
classes: []*resourceapi.DeviceClass{deviceClass},
|
|
nodes: []*v1.Node{workerNode, workerNode2},
|
|
objs: []apiruntime.Object{
|
|
st.MakeResourceSlice(nodeName, driver).Device("instance-1", map[resourceapi.QualifiedName]resourceapi.DeviceAttribute{attrName: {BoolValue: ptr.To(true)}}).Obj(),
|
|
st.MakeResourceSlice(node2Name, driver).Device("instance-1", map[resourceapi.QualifiedName]resourceapi.DeviceAttribute{attrName: {BoolValue: ptr.To(false)}}).Obj(),
|
|
},
|
|
want: want{
|
|
scoreResult: perNodeScoreResult{
|
|
nodeName: 8,
|
|
node2Name: 7,
|
|
},
|
|
normalizeScoreResult: fwk.NodeScoreList{
|
|
{
|
|
Name: nodeName,
|
|
Score: 100,
|
|
},
|
|
{
|
|
Name: node2Name,
|
|
Score: 87,
|
|
},
|
|
},
|
|
reserve: result{
|
|
inFlightClaims: []metav1.Object{allocatedClaimWithPrioritizedListAndSelector},
|
|
},
|
|
prebind: result{
|
|
assumedClaim: reserve(allocatedClaimWithPrioritizedListAndSelector, podWithClaimName),
|
|
changes: change{
|
|
claim: func(claim *resourceapi.ResourceClaim) *resourceapi.ResourceClaim {
|
|
if claim.Name == claimName {
|
|
claim = claim.DeepCopy()
|
|
claim.Finalizers = allocatedClaimWithPrioritizedListAndSelector.Finalizers
|
|
claim.Status = inUseClaimWithPrioritizedListAndSelector.Status
|
|
}
|
|
return claim
|
|
},
|
|
},
|
|
},
|
|
},
|
|
},
|
|
"multiple-claims-prioritized-list-scoring": {
|
|
enableDRAPrioritizedList: true,
|
|
pod: podWithTwoClaimNames,
|
|
claims: []*resourceapi.ResourceClaim{pendingClaimWithPrioritizedList, pendingClaim2WithPrioritizedListAndMultipleSubrequests},
|
|
classes: []*resourceapi.DeviceClass{deviceClass},
|
|
nodes: []*v1.Node{workerNode, workerNode2, workerNode3},
|
|
objs: []apiruntime.Object{
|
|
st.MakeResourceSlice(nodeName, driver).
|
|
Device("instance-1").
|
|
Device("instance-2").
|
|
Device("instance-3").
|
|
Device("instance-4").Obj(),
|
|
st.MakeResourceSlice(node2Name, driver).
|
|
Device("instance-1").
|
|
Device("instance-2").Obj(),
|
|
st.MakeResourceSlice(node3Name, driver).
|
|
Device("instance-1").Obj(),
|
|
},
|
|
want: want{
|
|
filter: perNodeResult{
|
|
workerNode3.Name: {
|
|
status: fwk.NewStatus(fwk.UnschedulableAndUnresolvable, `cannot allocate all claims`),
|
|
},
|
|
},
|
|
scoreResult: perNodeScoreResult{
|
|
workerNode.Name: 15,
|
|
workerNode2.Name: 13,
|
|
},
|
|
normalizeScoreResult: fwk.NodeScoreList{
|
|
{
|
|
Name: workerNode.Name,
|
|
Score: 100,
|
|
},
|
|
{
|
|
Name: workerNode2.Name,
|
|
Score: 86,
|
|
},
|
|
},
|
|
reserve: result{
|
|
inFlightClaims: []metav1.Object{allocatedClaimWithPrioritizedList, allocatedClaim2WithPrioritizedListAndMultipleSubrequests},
|
|
},
|
|
prebind: result{
|
|
assumedClaim: reserve(allocatedClaimWithPrioritizedList, podWithTwoClaimNames),
|
|
changes: change{
|
|
claim: func(claim *resourceapi.ResourceClaim) *resourceapi.ResourceClaim {
|
|
if claim.Name == claimName {
|
|
claim = claim.DeepCopy()
|
|
claim.Finalizers = inUseClaimWithPrioritizedList.Finalizers
|
|
claim.Status = inUseClaimWithPrioritizedList.Status
|
|
}
|
|
if claim.Name == claimName2 {
|
|
claim = claim.DeepCopy()
|
|
claim.Finalizers = inUseClaim2WithPrioritizedListAndMultipleSubrequests.Finalizers
|
|
claim.Status = inUseClaim2WithPrioritizedListAndMultipleSubrequests.Status
|
|
}
|
|
return claim
|
|
},
|
|
},
|
|
},
|
|
},
|
|
},
|
|
"multiple-requests-prioritized-list-scoring": {
|
|
enableDRAPrioritizedList: true,
|
|
pod: podWithClaimName,
|
|
claims: []*resourceapi.ResourceClaim{pendingClaimWithMultiplePrioritizedListRequests},
|
|
classes: []*resourceapi.DeviceClass{deviceClass},
|
|
nodes: []*v1.Node{workerNode, workerNode2, workerNode3},
|
|
objs: []apiruntime.Object{
|
|
st.MakeResourceSlice(nodeName, driver).
|
|
Device("instance-1").
|
|
Device("instance-2").
|
|
Device("instance-3").
|
|
Device("instance-4").Obj(),
|
|
st.MakeResourceSlice(node2Name, driver).
|
|
Device("instance-1").
|
|
Device("instance-2").
|
|
Device("instance-3").Obj(),
|
|
st.MakeResourceSlice(node3Name, driver).
|
|
Device("instance-1").
|
|
Device("instance-2").Obj(),
|
|
},
|
|
want: want{
|
|
scoreResult: perNodeScoreResult{
|
|
workerNode.Name: 16,
|
|
workerNode2.Name: 15,
|
|
workerNode3.Name: 14,
|
|
},
|
|
normalizeScoreResult: fwk.NodeScoreList{
|
|
{
|
|
Name: workerNode.Name,
|
|
Score: 100,
|
|
},
|
|
{
|
|
Name: workerNode2.Name,
|
|
Score: 93,
|
|
},
|
|
{
|
|
Name: workerNode3.Name,
|
|
Score: 87,
|
|
},
|
|
},
|
|
reserve: result{
|
|
inFlightClaims: []metav1.Object{allocatedClaimWithMultiplePrioritizedListRequests},
|
|
},
|
|
prebind: result{
|
|
assumedClaim: reserve(allocatedClaimWithMultiplePrioritizedListRequests, podWithClaimName),
|
|
changes: change{
|
|
claim: func(claim *resourceapi.ResourceClaim) *resourceapi.ResourceClaim {
|
|
if claim.Name == claimName {
|
|
claim = claim.DeepCopy()
|
|
claim.Finalizers = inUseClaimWithMultiplePrioritizedListRequests.Finalizers
|
|
claim.Status = inUseClaimWithMultiplePrioritizedListRequests.Status
|
|
}
|
|
return claim
|
|
},
|
|
},
|
|
},
|
|
},
|
|
},
|
|
}
|
|
|
|
for name, tc := range testcases {
|
|
if len(tc.skipOnWindows) > 0 && goruntime.GOOS == "windows" {
|
|
tCtx.Skipf("Skipping '%s' test case on Windows, reason: %s", name, tc.skipOnWindows)
|
|
}
|
|
tCtx.Run(name, func(tCtx ktesting.TContext) {
|
|
if tc.patchTestCase != nil {
|
|
tc.patchTestCase(&tc)
|
|
}
|
|
|
|
nodes := tc.nodes
|
|
if nodes == nil {
|
|
nodes = []*v1.Node{workerNode}
|
|
}
|
|
feats := feature.Features{
|
|
EnableDRAAdminAccess: !tc.disableDRAAdminAccess,
|
|
EnableDRADeviceBindingConditions: tc.enableDRADeviceBindingConditions,
|
|
EnableDRAResourceClaimDeviceStatus: tc.enableDRAResourceClaimDeviceStatus,
|
|
EnableDRADeviceTaints: tc.enableDRADeviceTaints,
|
|
EnableDRASchedulerFilterTimeout: !tc.disableDRASchedulerFilterTimeout,
|
|
EnableDynamicResourceAllocation: !tc.disableDRA,
|
|
EnableDRAPrioritizedList: tc.enableDRAPrioritizedList,
|
|
EnableDRAExtendedResource: tc.enableDRAExtendedResource,
|
|
}
|
|
|
|
if tc.disableDRAAdminAccess {
|
|
featuregatetesting.SetFeatureGateEmulationVersionDuringTest(tCtx, utilfeature.DefaultFeatureGate, version.MustParse("1.35"))
|
|
featuregatetesting.SetFeatureGateDuringTest(tCtx, utilfeature.DefaultFeatureGate, features.DRAAdminAccess, false)
|
|
}
|
|
featuregatetesting.SetFeatureGateDuringTest(tCtx, utilfeature.DefaultFeatureGate, features.DRAExtendedResource, tc.enableDRAExtendedResource)
|
|
testCtx := setup(tCtx, tc.args, nodes, tc.claims, tc.classes, tc.objs, feats, tc.failPatch, tc.reactors)
|
|
for _, claim := range tc.inFlightClaims {
|
|
tCtx.ExpectNoError(testCtx.draManager.ResourceClaims().SignalClaimPendingAllocation(claim.UID, claim))
|
|
}
|
|
|
|
initialObjects := testCtx.listAll(tCtx)
|
|
var registry compbasemetrics.KubeRegistry
|
|
if tc.metrics != nil {
|
|
registry = setupMetrics(feats)
|
|
}
|
|
|
|
status := testCtx.p.PreEnqueue(tCtx, tc.pod)
|
|
tCtx.Run("PreEnqueue", func(tCtx ktesting.TContext) {
|
|
testCtx.verify(tCtx, tc.want.preenqueue, initialObjects, nil, status)
|
|
})
|
|
if !status.IsSuccess() {
|
|
return
|
|
}
|
|
|
|
nodeInfo := framework.NewNodeInfo()
|
|
result, status := testCtx.p.PreFilter(tCtx, testCtx.state, tc.pod, []fwk.NodeInfo{nodeInfo})
|
|
tCtx.Run("prefilter", func(tCtx ktesting.TContext) {
|
|
assert.Equal(tCtx, tc.want.preFilterResult, result)
|
|
testCtx.verify(tCtx, tc.want.prefilter, initialObjects, result, status)
|
|
})
|
|
unschedulable := status.IsRejected()
|
|
|
|
var potentialNodes []fwk.NodeInfo
|
|
|
|
initialObjects = testCtx.listAll(tCtx)
|
|
testCtx.updateAPIServer(tCtx, initialObjects, tc.prepare.filter)
|
|
if !unschedulable {
|
|
for _, nodeInfo := range testCtx.nodeInfos {
|
|
var status *fwk.Status
|
|
tCtx.Run(fmt.Sprintf("filter/%s", nodeInfo.Node().Name), func(tCtx ktesting.TContext) {
|
|
initialObjects = testCtx.listAll(tCtx)
|
|
ctx := context.Context(tCtx)
|
|
if tc.cancelFilter {
|
|
c, cancel := context.WithCancelCause(ctx)
|
|
ctx = c
|
|
cancel(errors.New("test canceling Filter"))
|
|
}
|
|
status = testCtx.p.Filter(ctx, testCtx.state, tc.pod, nodeInfo)
|
|
nodeName := nodeInfo.Node().Name
|
|
testCtx.verify(tCtx, tc.want.filter.forNode(nodeName), initialObjects, nil, status)
|
|
})
|
|
if status.Code() == fwk.Success {
|
|
potentialNodes = append(potentialNodes, nodeInfo)
|
|
}
|
|
if status.Code() == fwk.Error {
|
|
// An error aborts scheduling.
|
|
return
|
|
}
|
|
}
|
|
if len(potentialNodes) == 0 {
|
|
unschedulable = true
|
|
}
|
|
}
|
|
|
|
var scores fwk.NodeScoreList
|
|
if !unschedulable && len(potentialNodes) > 1 {
|
|
initialObjects = testCtx.listAll(tCtx)
|
|
initialObjects = testCtx.updateAPIServer(tCtx, initialObjects, tc.prepare.prescore)
|
|
|
|
for _, potentialNode := range potentialNodes {
|
|
initialObjects = testCtx.listAll(tCtx)
|
|
score, status := testCtx.p.Score(tCtx, testCtx.state, tc.pod, potentialNode)
|
|
nodeName := potentialNode.Node().Name
|
|
tCtx.Run(fmt.Sprintf("score/%s", nodeName), func(tCtx ktesting.TContext) {
|
|
assert.Equal(tCtx, tc.want.scoreResult.forNode(nodeName), score)
|
|
testCtx.verify(tCtx, tc.want.score.forNode(nodeName), initialObjects, nil, status)
|
|
})
|
|
scores = append(scores, fwk.NodeScore{Name: nodeName, Score: score})
|
|
}
|
|
|
|
initialObjects = testCtx.listAll(tCtx)
|
|
status := testCtx.p.NormalizeScore(tCtx, testCtx.state, tc.pod, scores)
|
|
tCtx.Run("normalizeScore", func(tCtx ktesting.TContext) {
|
|
assert.Equal(tCtx, tc.want.normalizeScoreResult, scores)
|
|
testCtx.verify(tCtx, tc.want.normalizeScore, initialObjects, nil, status)
|
|
})
|
|
}
|
|
|
|
var selectedNodeName string
|
|
if !unschedulable && len(potentialNodes) > 0 {
|
|
if len(scores) > 0 {
|
|
nodeScore := scores[0]
|
|
for _, score := range scores {
|
|
if score.Score > nodeScore.Score {
|
|
nodeScore = score
|
|
}
|
|
}
|
|
selectedNodeName = nodeScore.Name
|
|
} else {
|
|
selectedNodeName = potentialNodes[0].Node().Name
|
|
}
|
|
|
|
initialObjects = testCtx.listAll(tCtx)
|
|
initialObjects = testCtx.updateAPIServer(tCtx, initialObjects, tc.prepare.reserve)
|
|
status := testCtx.p.Reserve(tCtx, testCtx.state, tc.pod, selectedNodeName)
|
|
tCtx.Run("reserve", func(tCtx ktesting.TContext) {
|
|
testCtx.verify(tCtx, tc.want.reserve, initialObjects, nil, status)
|
|
})
|
|
if status.Code() != fwk.Success {
|
|
unschedulable = true
|
|
}
|
|
}
|
|
|
|
if selectedNodeName != "" {
|
|
if unschedulable {
|
|
initialObjects = testCtx.listAll(tCtx)
|
|
initialObjects = testCtx.updateAPIServer(tCtx, initialObjects, tc.prepare.unreserve)
|
|
testCtx.p.Unreserve(tCtx, testCtx.state, tc.pod, selectedNodeName)
|
|
tCtx.Run("unreserve", func(tCtx ktesting.TContext) {
|
|
testCtx.verify(tCtx, tc.want.unreserve, initialObjects, nil, status)
|
|
})
|
|
} else {
|
|
if tc.want.unreserveBeforePreBind != nil {
|
|
initialObjects = testCtx.listAll(tCtx)
|
|
testCtx.p.Unreserve(tCtx, testCtx.state, tc.pod, selectedNodeName)
|
|
tCtx.Run("unreserveBeforePreBind", func(tCtx ktesting.TContext) {
|
|
testCtx.verify(tCtx, *tc.want.unreserveBeforePreBind, initialObjects, nil, status)
|
|
})
|
|
return
|
|
}
|
|
|
|
initialObjects = testCtx.listAll(tCtx)
|
|
initialObjects = testCtx.updateAPIServer(tCtx, initialObjects, tc.prepare.prebind)
|
|
preBindPreFlightResult, preBindPreFlightStatus := testCtx.p.PreBindPreFlight(tCtx, testCtx.state, tc.pod, selectedNodeName)
|
|
tCtx.Run("preBindPreFlightStatus", func(tContext ktesting.TContext) {
|
|
assert.Equal(tCtx, tc.want.preBindPreFlightStatus, preBindPreFlightStatus)
|
|
})
|
|
tCtx.Run("preBindPreFlightResult", func(tContext ktesting.TContext) {
|
|
assert.Equal(tCtx, &fwk.PreBindPreFlightResult{AllowParallel: true}, preBindPreFlightResult)
|
|
})
|
|
preBindStatus := testCtx.p.PreBind(tCtx, testCtx.state, tc.pod, selectedNodeName)
|
|
tCtx.Run("prebind", func(tCtx ktesting.TContext) {
|
|
testCtx.verify(tCtx, tc.want.prebind, initialObjects, nil, preBindStatus)
|
|
})
|
|
if tc.want.unreserveAfterBindFailure != nil {
|
|
initialObjects = testCtx.listAll(tCtx)
|
|
testCtx.p.Unreserve(tCtx, testCtx.state, tc.pod, selectedNodeName)
|
|
tCtx.Run("unreserverAfterBindFailure", func(tCtx ktesting.TContext) {
|
|
testCtx.verify(tCtx, *tc.want.unreserveAfterBindFailure, initialObjects, nil, status)
|
|
})
|
|
} else if status.IsSuccess() {
|
|
initialObjects = testCtx.listAll(tCtx)
|
|
initialObjects = testCtx.updateAPIServer(tCtx, initialObjects, tc.prepare.postbind)
|
|
}
|
|
}
|
|
} else if len(potentialNodes) == 0 {
|
|
initialObjects = testCtx.listAll(tCtx)
|
|
initialObjects = testCtx.updateAPIServer(tCtx, initialObjects, tc.prepare.postfilter)
|
|
result, status := testCtx.p.PostFilter(tCtx, testCtx.state, tc.pod, nil /* filteredNodeStatusMap not used by plugin */)
|
|
tCtx.Run("postfilter", func(tCtx ktesting.TContext) {
|
|
assert.Equal(tCtx, tc.want.postFilterResult, result)
|
|
testCtx.verify(tCtx, tc.want.postfilter, initialObjects, nil, status)
|
|
})
|
|
}
|
|
if tc.metrics != nil {
|
|
tc.metrics(tCtx, registry)
|
|
}
|
|
})
|
|
}
|
|
}
|
|
|
|
func setupMetrics(features feature.Features) compbasemetrics.KubeRegistry {
|
|
// Since feature gate is not set globally, we can't use metrics.Register().
|
|
// We use a new registry instead of using global registry.
|
|
testRegistry := compbasemetrics.NewKubeRegistry()
|
|
if features.EnableDRAExtendedResource {
|
|
testRegistry.MustRegister(metrics.ResourceClaimCreatesTotal)
|
|
metrics.ResourceClaimCreatesTotal.Reset()
|
|
}
|
|
// DRA DeviceBindingConditions metrics.
|
|
if features.EnableDRADeviceBindingConditions {
|
|
testRegistry.MustRegister(metrics.DRABindingConditionsAllocationsTotal)
|
|
testRegistry.MustRegister(metrics.DRABindingConditionsPreBindDuration)
|
|
|
|
metrics.DRABindingConditionsAllocationsTotal.Reset()
|
|
metrics.DRABindingConditionsPreBindDuration.Reset()
|
|
}
|
|
return testRegistry
|
|
}
|
|
|
|
type testContext struct {
|
|
client *fake.Clientset
|
|
informerFactory informers.SharedInformerFactory
|
|
draManager *DefaultDRAManager
|
|
p *DynamicResources
|
|
nodeInfos []fwk.NodeInfo
|
|
state fwk.CycleState
|
|
}
|
|
|
|
func (tc *testContext) verify(tCtx ktesting.TContext, expected result, initialObjects []metav1.Object, result interface{}, status *fwk.Status) {
|
|
tCtx.Helper()
|
|
if expected.status == nil {
|
|
assert.Nil(tCtx, status)
|
|
} else if actualErr := status.AsError(); actualErr != nil {
|
|
// Compare only the error strings.
|
|
assert.ErrorContains(tCtx, actualErr, expected.status.AsError().Error())
|
|
} else {
|
|
assert.Equal(tCtx, expected.status, status)
|
|
}
|
|
objects := tc.listAll(tCtx)
|
|
wantObjects := update(initialObjects, expected.changes)
|
|
wantObjects = append(wantObjects, expected.added...)
|
|
for _, remove := range expected.removed {
|
|
for i, obj := range wantObjects {
|
|
// This is a bit relaxed (no GVR comparison, no UID
|
|
// comparison) to simplify writing the test cases.
|
|
if obj.GetName() == remove.GetName() && obj.GetNamespace() == remove.GetNamespace() {
|
|
wantObjects = append(wantObjects[0:i], wantObjects[i+1:]...)
|
|
break
|
|
}
|
|
}
|
|
}
|
|
sortObjects(wantObjects)
|
|
if wantObjects == nil {
|
|
wantObjects = []metav1.Object{}
|
|
}
|
|
if objects == nil {
|
|
objects = []metav1.Object{}
|
|
}
|
|
|
|
// Sometimes assert strips the diff too much, let's do it ourselves...
|
|
ignoreFieldsInResourceClaims := []cmp.Option{
|
|
cmpopts.IgnoreFields(metav1.ObjectMeta{}, "UID", "ResourceVersion"),
|
|
cmp.Transformer("AllocationTimestamp", func(result resourceapi.AllocationResult) resourceapi.AllocationResult {
|
|
// Replace all allocation timestamps with the empty timestamp before comparison
|
|
// because the actual value is unpredictable (not running in a synctest bubble).
|
|
if result.AllocationTimestamp != nil {
|
|
result.AllocationTimestamp = new(metav1.Time)
|
|
}
|
|
return result
|
|
}),
|
|
// It does not matter which specific device is allocated for the testing purpose.
|
|
cmpopts.IgnoreFields(resourceapi.DeviceRequestAllocationResult{}, "Device"),
|
|
}
|
|
|
|
if diff := cmp.Diff(wantObjects, objects, ignoreFieldsInResourceClaims...); diff != "" {
|
|
tCtx.Errorf("Stored objects are different (- expected, + actual):\n%s", diff)
|
|
}
|
|
|
|
var expectAssumedClaims []metav1.Object
|
|
if expected.assumedClaim != nil {
|
|
expectAssumedClaims = append(expectAssumedClaims, expected.assumedClaim)
|
|
}
|
|
// actualAssumedClaims are claims in assumed cache with different latest and api object
|
|
// sameAssumedClaims are claims in assumed cache with same latest and api object
|
|
actualAssumedClaims, sameAssumedClaims := tc.listAssumedClaims()
|
|
|
|
// error when expecting no claims in assumed cache with different latest and api object
|
|
if len(expectAssumedClaims) == 0 && len(actualAssumedClaims) != 0 {
|
|
// In case we delete the claim API object, wait for assumed cache to sync with informer,
|
|
// then assumed cache should be empty.
|
|
err := wait.PollUntilContextTimeout(tCtx, 200*time.Millisecond, time.Minute, true,
|
|
func(ctx context.Context) (bool, error) {
|
|
actualAssumedClaims, sameAssumedClaims = tc.listAssumedClaims()
|
|
return len(actualAssumedClaims) == 0, nil
|
|
})
|
|
if err != nil || len(actualAssumedClaims) != 0 {
|
|
tCtx.Errorf("Assumed claims are different, err=%v, expected: nil, actual:\n%v", err, actualAssumedClaims)
|
|
}
|
|
}
|
|
if len(expectAssumedClaims) > 0 {
|
|
// it is not an error as long as the expected claim is present in the assumed cache, no
|
|
// matter its latest and api object are different or not.
|
|
for _, expected := range expectAssumedClaims {
|
|
seen := false
|
|
for _, actual := range actualAssumedClaims {
|
|
if cmp.Equal(expected, actual, ignoreFieldsInResourceClaims...) {
|
|
seen = true
|
|
}
|
|
}
|
|
for _, same := range sameAssumedClaims {
|
|
if cmp.Equal(expected, same, ignoreFieldsInResourceClaims...) {
|
|
seen = true
|
|
}
|
|
}
|
|
if !seen {
|
|
tCtx.Errorf("Assumed claims are different, expected: %v not found", expected)
|
|
}
|
|
}
|
|
}
|
|
|
|
actualInFlightClaims := tc.listInFlightClaims()
|
|
if diff := cmp.Diff(expected.inFlightClaims, actualInFlightClaims, ignoreFieldsInResourceClaims...); diff != "" {
|
|
tCtx.Errorf("In-flight claims are different (- expected, + actual):\n%s", diff)
|
|
}
|
|
}
|
|
|
|
func (tc *testContext) listAll(tCtx ktesting.TContext) (objects []metav1.Object) {
|
|
tCtx.Helper()
|
|
claims, err := tc.client.ResourceV1().ResourceClaims("").List(tCtx, metav1.ListOptions{})
|
|
tCtx.ExpectNoError(err, "list claims")
|
|
for _, claim := range claims.Items {
|
|
objects = append(objects, &claim)
|
|
}
|
|
sortObjects(objects)
|
|
return
|
|
}
|
|
|
|
func (tc *testContext) listAssumedClaims() ([]metav1.Object, []metav1.Object) {
|
|
var assumedClaims []metav1.Object
|
|
var sameClaims []metav1.Object
|
|
for _, obj := range tc.draManager.resourceClaimTracker.cache.List(nil) {
|
|
claim := obj.(*resourceapi.ResourceClaim)
|
|
obj, _ := tc.draManager.resourceClaimTracker.cache.Get(claim.Namespace + "/" + claim.Name)
|
|
apiObj, _ := tc.draManager.resourceClaimTracker.cache.GetAPIObj(claim.Namespace + "/" + claim.Name)
|
|
if obj != apiObj {
|
|
assumedClaims = append(assumedClaims, claim)
|
|
} else {
|
|
sameClaims = append(sameClaims, claim)
|
|
}
|
|
}
|
|
sortObjects(assumedClaims)
|
|
sortObjects(sameClaims)
|
|
return assumedClaims, sameClaims
|
|
}
|
|
|
|
func (tc *testContext) listInFlightClaims() []metav1.Object {
|
|
var inFlightClaims []metav1.Object
|
|
tc.draManager.resourceClaimTracker.inFlightAllocations.Range(func(key, value any) bool {
|
|
inFlightClaims = append(inFlightClaims, value.(*resourceapi.ResourceClaim))
|
|
return true
|
|
})
|
|
sortObjects(inFlightClaims)
|
|
return inFlightClaims
|
|
}
|
|
|
|
// updateAPIServer modifies objects and stores any changed object in the API server.
|
|
func (tc *testContext) updateAPIServer(tCtx ktesting.TContext, objects []metav1.Object, updates change) []metav1.Object {
|
|
modified := update(objects, updates)
|
|
for i := range modified {
|
|
obj := modified[i]
|
|
if diff := cmp.Diff(objects[i], obj); diff != "" {
|
|
tCtx.Logf("Updating %T %q, diff (-old, +new):\n%s", obj, obj.GetName(), diff)
|
|
switch obj := obj.(type) {
|
|
case *resourceapi.ResourceClaim:
|
|
obj, err := tc.client.ResourceV1().ResourceClaims(obj.Namespace).Update(tCtx, obj, metav1.UpdateOptions{})
|
|
tCtx.ExpectNoError(err, "prepare update")
|
|
modified[i] = obj
|
|
default:
|
|
tCtx.Fatalf("unsupported object type %T", obj)
|
|
}
|
|
}
|
|
}
|
|
return modified
|
|
}
|
|
|
|
func sortObjects(objects []metav1.Object) {
|
|
sort.Slice(objects, func(i, j int) bool {
|
|
if objects[i].GetNamespace() < objects[j].GetNamespace() {
|
|
return true
|
|
}
|
|
return objects[i].GetName() < objects[j].GetName()
|
|
})
|
|
}
|
|
|
|
// update walks through all existing objects, finds the corresponding update
|
|
// function based on name and kind, and replaces those objects that have an
|
|
// update function. The rest is left unchanged.
|
|
func update(objects []metav1.Object, updates change) []metav1.Object {
|
|
var updated []metav1.Object
|
|
|
|
for _, obj := range objects {
|
|
switch in := obj.(type) {
|
|
case *resourceapi.ResourceClaim:
|
|
if updates.claim != nil {
|
|
obj = updates.claim(in)
|
|
}
|
|
}
|
|
updated = append(updated, obj)
|
|
}
|
|
|
|
return updated
|
|
}
|
|
|
|
func setup(tCtx ktesting.TContext, args *config.DynamicResourcesArgs, nodes []*v1.Node, claims []*resourceapi.ResourceClaim, classes []*resourceapi.DeviceClass, objs []apiruntime.Object, features feature.Features, failPatch bool, apiReactors []cgotesting.Reactor) (result *testContext) {
|
|
tCtx.Helper()
|
|
|
|
tc := &testContext{}
|
|
|
|
tc.client = fake.NewSimpleClientset(objs...)
|
|
reactor := createReactor(tc.client.Tracker(), failPatch)
|
|
tc.client.PrependReactor("*", "*", reactor)
|
|
// Prepends reactors to the client.
|
|
tc.client.ReactionChain = append(apiReactors, tc.client.ReactionChain...)
|
|
|
|
tc.informerFactory = informers.NewSharedInformerFactory(tc.client, 0)
|
|
var doneCheckers []cache.DoneChecker
|
|
resourceSliceTrackerOpts := resourceslicetracker.Options{
|
|
EnableDeviceTaintRules: true,
|
|
SliceInformer: tc.informerFactory.Resource().V1().ResourceSlices(),
|
|
TaintInformer: tc.informerFactory.Resource().V1beta2().DeviceTaintRules(),
|
|
ClassInformer: tc.informerFactory.Resource().V1().DeviceClasses(),
|
|
KubeClient: tc.client,
|
|
}
|
|
resourceSliceTracker, err := resourceslicetracker.StartTracker(tCtx, resourceSliceTrackerOpts)
|
|
require.NoError(tCtx, err, "couldn't start resource slice tracker")
|
|
doneCheckers = append(doneCheckers, resourceSliceTracker.HasSyncedChecker())
|
|
|
|
claimsCache := assumecache.NewAssumeCache(tCtx.Logger(), tc.informerFactory.Resource().V1().ResourceClaims().Informer(), "resource claim", "", nil)
|
|
// NewAssumeCache calls the informer's AddEventHandler method to register
|
|
// a handler in order to stay in sync with the informer's store, but
|
|
// NewAssumeCache does not return the ResourceEventHandlerRegistration.
|
|
// We call AddEventHandler of the assume cache, passing it a noop
|
|
// ResourceEventHandler in order to get access to the
|
|
// ResourceEventHandlerRegistration returned by the informer.
|
|
//
|
|
// This is not the registered handler that is used by the DRA
|
|
// manager, but it is close enough because the assume cache
|
|
// uses a single boolean for "is synced" for all handlers.
|
|
doneCheckers = append(doneCheckers, claimsCache.AddEventHandler(cache.ResourceEventHandlerFuncs{}).HasSyncedChecker())
|
|
|
|
tc.draManager = NewDRAManager(tCtx, claimsCache, resourceSliceTracker, tc.informerFactory)
|
|
if features.EnableDRAExtendedResource {
|
|
cache := tc.draManager.DeviceClassResolver().(*extendedresourcecache.ExtendedResourceCache)
|
|
deviceClassHandlerRegistration, err := tc.informerFactory.Resource().V1().DeviceClasses().Informer().AddEventHandler(cache)
|
|
require.NoError(tCtx, err, "failed to add device class informer event handler")
|
|
doneCheckers = append(doneCheckers, deviceClassHandlerRegistration.HasSyncedChecker())
|
|
}
|
|
|
|
opts := []runtime.Option{
|
|
runtime.WithClientSet(tc.client),
|
|
runtime.WithInformerFactory(tc.informerFactory),
|
|
runtime.WithEventRecorder(&events.FakeRecorder{}),
|
|
runtime.WithSharedDRAManager(tc.draManager),
|
|
}
|
|
fh, err := runtime.NewFramework(tCtx, nil, nil, opts...)
|
|
tCtx.ExpectNoError(err, "create scheduler framework")
|
|
tCtx.Cleanup(func() {
|
|
tCtx.Cancel("test has completed")
|
|
runtime.WaitForShutdown(fh)
|
|
})
|
|
|
|
if args == nil {
|
|
args = getDefaultDynamicResourcesArgs()
|
|
}
|
|
pl, err := New(tCtx, args, fh, features)
|
|
tCtx.ExpectNoError(err, "create plugin")
|
|
tc.p = pl.(*DynamicResources)
|
|
|
|
// The tests use the API to create the objects because then reactors
|
|
// get triggered.
|
|
for _, claim := range claims {
|
|
_, err := tc.client.ResourceV1().ResourceClaims(claim.Namespace).Create(tCtx, claim, metav1.CreateOptions{})
|
|
tCtx.ExpectNoError(err, "create resource claim")
|
|
}
|
|
for _, class := range classes {
|
|
_, err := tc.client.ResourceV1().DeviceClasses().Create(tCtx, class, metav1.CreateOptions{})
|
|
tCtx.ExpectNoError(err, "create resource class")
|
|
}
|
|
|
|
tc.informerFactory.Start(tCtx.Done())
|
|
tCtx.Cleanup(func() {
|
|
// Need to cancel before waiting for the shutdown.
|
|
tCtx.Cancel("test is done")
|
|
// Now we can wait for all goroutines to stop.
|
|
tc.informerFactory.Shutdown()
|
|
})
|
|
|
|
tc.informerFactory.WaitForCacheSync(tCtx.Done())
|
|
// The above does not tell us if the registered handlers (e.g. from NewAssumeCache)
|
|
// are synced, we need to wait until the event handlers confirm that they are synced.
|
|
// This ensures that the assume cache is in sync with the informer's
|
|
// store which has been informed by at least one full LIST of the underlying storage.
|
|
cache.WaitFor(tCtx, "event handlers", doneCheckers...)
|
|
|
|
for _, node := range nodes {
|
|
nodeInfo := framework.NewNodeInfo()
|
|
nodeInfo.SetNode(node)
|
|
tc.nodeInfos = append(tc.nodeInfos, nodeInfo)
|
|
}
|
|
tc.state = framework.NewCycleState()
|
|
|
|
return tc
|
|
}
|
|
|
|
// createReactor implements the logic required for the UID and ResourceVersion
|
|
// fields to work when using the fake client. Add it with client.PrependReactor
|
|
// to your fake client. ResourceVersion handling is required for conflict
|
|
// detection during updates, which is covered by some scenarios.
|
|
func createReactor(tracker cgotesting.ObjectTracker, failPatch bool) func(action cgotesting.Action) (handled bool, ret apiruntime.Object, err error) {
|
|
var nameCounter int
|
|
var uidCounter int
|
|
var resourceVersionCounter int
|
|
var mutex sync.Mutex
|
|
|
|
return func(action cgotesting.Action) (handled bool, ret apiruntime.Object, err error) {
|
|
if failPatch {
|
|
if _, ok := action.(cgotesting.PatchAction); ok {
|
|
return true, nil, errors.New("patch error")
|
|
}
|
|
}
|
|
|
|
createAction, ok := action.(cgotesting.CreateAction)
|
|
if !ok {
|
|
return false, nil, nil
|
|
}
|
|
obj, ok := createAction.GetObject().(metav1.Object)
|
|
if !ok {
|
|
return false, nil, nil
|
|
}
|
|
|
|
mutex.Lock()
|
|
defer mutex.Unlock()
|
|
switch action.GetVerb() {
|
|
case "create":
|
|
if obj.GetUID() != "" {
|
|
return true, nil, errors.New("UID must not be set on create")
|
|
}
|
|
if obj.GetResourceVersion() != "" {
|
|
return true, nil, errors.New("ResourceVersion must not be set on create")
|
|
}
|
|
obj.SetUID(types.UID(fmt.Sprintf("UID-%d", uidCounter)))
|
|
uidCounter++
|
|
obj.SetResourceVersion(fmt.Sprintf("%d", resourceVersionCounter))
|
|
resourceVersionCounter++
|
|
if obj.GetName() == "" {
|
|
obj.SetName(obj.GetGenerateName() + fmt.Sprintf("%d", nameCounter))
|
|
nameCounter++
|
|
}
|
|
case "update":
|
|
uid := obj.GetUID()
|
|
resourceVersion := obj.GetResourceVersion()
|
|
if uid == "" {
|
|
return true, nil, errors.New("UID must be set on update")
|
|
}
|
|
if resourceVersion == "" {
|
|
return true, nil, errors.New("ResourceVersion must be set on update")
|
|
}
|
|
|
|
oldObj, err := tracker.Get(action.GetResource(), obj.GetNamespace(), obj.GetName())
|
|
if err != nil {
|
|
return true, nil, err
|
|
}
|
|
oldObjMeta, ok := oldObj.(metav1.Object)
|
|
if !ok {
|
|
return true, nil, errors.New("internal error: unexpected old object type")
|
|
}
|
|
if oldObjMeta.GetResourceVersion() != resourceVersion {
|
|
return true, nil, apierrors.NewConflict(action.GetResource().GroupResource(), obj.GetName(), errors.New("ResourceVersion must match the object that gets updated"))
|
|
}
|
|
|
|
obj.SetResourceVersion(fmt.Sprintf("%d", resourceVersionCounter))
|
|
resourceVersionCounter++
|
|
}
|
|
return false, nil, nil
|
|
}
|
|
}
|
|
|
|
func TestIsSchedulableAfterClaimChange(t *testing.T) {
|
|
testIsSchedulableAfterClaimChange(ktesting.Init(t))
|
|
}
|
|
func testIsSchedulableAfterClaimChange(tCtx ktesting.TContext) {
|
|
testcases := map[string]struct {
|
|
pod *v1.Pod
|
|
claims []*resourceapi.ResourceClaim
|
|
oldObj, newObj interface{}
|
|
wantHint fwk.QueueingHint
|
|
wantErr bool
|
|
}{
|
|
"skip-deletes": {
|
|
pod: podWithClaimTemplate,
|
|
oldObj: allocatedClaim,
|
|
newObj: nil,
|
|
wantHint: fwk.QueueSkip,
|
|
},
|
|
"backoff-wrong-new-object": {
|
|
pod: podWithClaimTemplate,
|
|
newObj: "not-a-claim",
|
|
wantErr: true,
|
|
},
|
|
"skip-wrong-claim": {
|
|
pod: podWithClaimTemplate,
|
|
newObj: func() *resourceapi.ResourceClaim {
|
|
claim := allocatedClaim.DeepCopy()
|
|
claim.OwnerReferences[0].UID += "123"
|
|
return claim
|
|
}(),
|
|
wantHint: fwk.QueueSkip,
|
|
},
|
|
"skip-unrelated-claim": {
|
|
pod: podWithClaimTemplate,
|
|
claims: []*resourceapi.ResourceClaim{allocatedClaim},
|
|
newObj: func() *resourceapi.ResourceClaim {
|
|
claim := allocatedClaim.DeepCopy()
|
|
claim.Name += "-foo"
|
|
claim.UID += "123"
|
|
return claim
|
|
}(),
|
|
wantHint: fwk.QueueSkip,
|
|
},
|
|
"queue-on-add": {
|
|
pod: podWithClaimName,
|
|
newObj: pendingClaim,
|
|
wantHint: fwk.Queue,
|
|
},
|
|
"backoff-wrong-old-object": {
|
|
pod: podWithClaimName,
|
|
claims: []*resourceapi.ResourceClaim{pendingClaim},
|
|
oldObj: "not-a-claim",
|
|
newObj: pendingClaim,
|
|
wantErr: true,
|
|
},
|
|
"skip-adding-finalizer": {
|
|
pod: podWithClaimName,
|
|
claims: []*resourceapi.ResourceClaim{pendingClaim},
|
|
oldObj: pendingClaim,
|
|
newObj: func() *resourceapi.ResourceClaim {
|
|
claim := pendingClaim.DeepCopy()
|
|
claim.Finalizers = append(claim.Finalizers, "foo")
|
|
return claim
|
|
}(),
|
|
wantHint: fwk.QueueSkip,
|
|
},
|
|
"queue-on-status-change": {
|
|
pod: podWithClaimName,
|
|
claims: []*resourceapi.ResourceClaim{pendingClaim},
|
|
oldObj: pendingClaim,
|
|
newObj: func() *resourceapi.ResourceClaim {
|
|
claim := pendingClaim.DeepCopy()
|
|
claim.Status.Allocation = &resourceapi.AllocationResult{}
|
|
return claim
|
|
}(),
|
|
wantHint: fwk.Queue,
|
|
},
|
|
"claim-deallocate": {
|
|
pod: podWithClaimName,
|
|
claims: []*resourceapi.ResourceClaim{pendingClaim, otherAllocatedClaim},
|
|
oldObj: otherAllocatedClaim,
|
|
newObj: func() *resourceapi.ResourceClaim {
|
|
claim := otherAllocatedClaim.DeepCopy()
|
|
claim.Status.Allocation = nil
|
|
return claim
|
|
}(),
|
|
wantHint: fwk.Queue,
|
|
},
|
|
}
|
|
|
|
for name, tc := range testcases {
|
|
tCtx.SyncTest(name, func(tCtx ktesting.TContext) {
|
|
features := feature.Features{
|
|
EnableDRASchedulerFilterTimeout: true,
|
|
EnableDynamicResourceAllocation: true,
|
|
}
|
|
testCtx := setup(tCtx, nil, nil, tc.claims, nil, nil, features, false, nil)
|
|
oldObj := tc.oldObj
|
|
newObj := tc.newObj
|
|
if claim, ok := tc.newObj.(*resourceapi.ResourceClaim); ok {
|
|
// Add or update through the client and wait until the event is processed.
|
|
claimKey := claim.Namespace + "/" + claim.Name
|
|
if tc.oldObj == nil {
|
|
// Some test claims already have it. Clear for create.
|
|
createClaim := claim.DeepCopy()
|
|
createClaim.UID = ""
|
|
storedClaim, err := testCtx.client.ResourceV1().ResourceClaims(createClaim.Namespace).Create(tCtx, createClaim, metav1.CreateOptions{})
|
|
if err != nil {
|
|
tCtx.Fatalf("create claim: expected no error, got: %v", err)
|
|
}
|
|
claim = storedClaim
|
|
} else {
|
|
cachedClaim, err := testCtx.draManager.resourceClaimTracker.cache.Get(claimKey)
|
|
if err != nil {
|
|
tCtx.Fatalf("retrieve old claim: expected no error, got: %v", err)
|
|
}
|
|
updateClaim := claim.DeepCopy()
|
|
// The test claim doesn't have those (generated dynamically), so copy them.
|
|
updateClaim.UID = cachedClaim.(*resourceapi.ResourceClaim).UID
|
|
updateClaim.ResourceVersion = cachedClaim.(*resourceapi.ResourceClaim).ResourceVersion
|
|
|
|
storedClaim, err := testCtx.client.ResourceV1().ResourceClaims(updateClaim.Namespace).Update(tCtx, updateClaim, metav1.UpdateOptions{})
|
|
if err != nil {
|
|
tCtx.Fatalf("update claim: expected no error, got: %v", err)
|
|
}
|
|
claim = storedClaim
|
|
}
|
|
|
|
// Eventually the assume cache will have it, too.
|
|
tCtx.Wait()
|
|
cachedClaim, err := testCtx.draManager.resourceClaimTracker.cache.Get(claimKey)
|
|
tCtx.ExpectNoError(err, "retrieve claim")
|
|
if cachedClaim.(*resourceapi.ResourceClaim).ResourceVersion != claim.ResourceVersion {
|
|
tCtx.Errorf("cached claim not updated yet")
|
|
}
|
|
|
|
// This has the actual UID and ResourceVersion,
|
|
// which is relevant for
|
|
// isSchedulableAfterClaimChange.
|
|
newObj = claim
|
|
}
|
|
gotHint, err := testCtx.p.isSchedulableAfterClaimChange(tCtx.Logger(), tc.pod, oldObj, newObj)
|
|
if tc.wantErr {
|
|
if err == nil {
|
|
tCtx.Fatal("want an error, got none")
|
|
}
|
|
return
|
|
}
|
|
|
|
if err != nil {
|
|
tCtx.Fatalf("want no error, got: %v", err)
|
|
}
|
|
if tc.wantHint != gotHint {
|
|
tCtx.Fatalf("want %#v, got %#v", tc.wantHint.String(), gotHint.String())
|
|
}
|
|
})
|
|
}
|
|
}
|
|
|
|
func TestIsSchedulableAfterPodChange(t *testing.T) {
|
|
testIsSchedulableAfterPodChange(ktesting.Init(t))
|
|
}
|
|
func testIsSchedulableAfterPodChange(tCtx ktesting.TContext) {
|
|
testcases := map[string]struct {
|
|
objs []apiruntime.Object
|
|
pod *v1.Pod
|
|
claims []*resourceapi.ResourceClaim
|
|
obj interface{}
|
|
wantHint fwk.QueueingHint
|
|
wantErr bool
|
|
}{
|
|
"backoff-wrong-new-object": {
|
|
pod: podWithClaimTemplate,
|
|
obj: "not-a-claim",
|
|
wantErr: true,
|
|
},
|
|
"complete": {
|
|
objs: []apiruntime.Object{pendingClaim},
|
|
pod: podWithClaimTemplate,
|
|
obj: podWithClaimTemplateInStatus,
|
|
wantHint: fwk.Queue,
|
|
},
|
|
"wrong-pod": {
|
|
objs: []apiruntime.Object{pendingClaim},
|
|
pod: func() *v1.Pod {
|
|
pod := podWithClaimTemplate.DeepCopy()
|
|
pod.Name += "2"
|
|
pod.UID += "2" // This is the relevant difference.
|
|
return pod
|
|
}(),
|
|
obj: podWithClaimTemplateInStatus,
|
|
wantHint: fwk.QueueSkip,
|
|
},
|
|
"missing-claim": {
|
|
objs: nil,
|
|
pod: podWithClaimTemplate,
|
|
obj: podWithClaimTemplateInStatus,
|
|
wantHint: fwk.QueueSkip,
|
|
},
|
|
"incomplete": {
|
|
objs: []apiruntime.Object{pendingClaim},
|
|
pod: podWithTwoClaimTemplates,
|
|
obj: func() *v1.Pod {
|
|
pod := podWithTwoClaimTemplates.DeepCopy()
|
|
// Only one of two claims created.
|
|
pod.Status.ResourceClaimStatuses = []v1.PodResourceClaimStatus{{
|
|
Name: pod.Spec.ResourceClaims[0].Name,
|
|
ResourceClaimName: &claimName,
|
|
}}
|
|
return pod
|
|
}(),
|
|
wantHint: fwk.QueueSkip,
|
|
},
|
|
}
|
|
|
|
for name, tc := range testcases {
|
|
tCtx.Run(name, func(tCtx ktesting.TContext) {
|
|
features := feature.Features{
|
|
EnableDRASchedulerFilterTimeout: true,
|
|
EnableDynamicResourceAllocation: true,
|
|
}
|
|
testCtx := setup(tCtx, nil, nil, tc.claims, nil, tc.objs, features, false, nil)
|
|
gotHint, err := testCtx.p.isSchedulableAfterPodChange(tCtx.Logger(), tc.pod, nil, tc.obj)
|
|
if tc.wantErr {
|
|
if err == nil {
|
|
tCtx.Fatal("want an error, got none")
|
|
}
|
|
return
|
|
}
|
|
|
|
if err != nil {
|
|
tCtx.Fatalf("want no error, got: %v", err)
|
|
}
|
|
if tc.wantHint != gotHint {
|
|
tCtx.Fatalf("want %#v, got %#v", tc.wantHint.String(), gotHint.String())
|
|
}
|
|
})
|
|
}
|
|
}
|
|
|
|
// mockDeviceClassResolver is a simple mock implementation of fwk.DeviceClassResolver for testing
|
|
type mockDeviceClassResolver struct {
|
|
mapping map[v1.ResourceName]*resourceapi.DeviceClass
|
|
}
|
|
|
|
func (m *mockDeviceClassResolver) GetDeviceClass(resourceName v1.ResourceName) *resourceapi.DeviceClass {
|
|
return m.mapping[resourceName]
|
|
}
|
|
|
|
// TestAllocatorSelection covers the selection of a structured allocation implementation
|
|
// based on actual Kubernetes feature gates. This test lives here instead of
|
|
// k8s.io/dynamic-resource-allocation/structured because that code has no access
|
|
// to feature gate definitions.
|
|
func TestAllocatorSelection(t *testing.T) {
|
|
for name, tc := range map[string]struct {
|
|
features string
|
|
expectImplementation string
|
|
}{
|
|
// The most conservative implementation: only used when explicitly asking
|
|
// for the most stable Kubernetes (no alpha or beta features).
|
|
"only-GA": {
|
|
features: "AllAlpha=false,AllBeta=false",
|
|
expectImplementation: "stable",
|
|
},
|
|
|
|
// By default, some beta features are on and the incubating implementation
|
|
// is used.
|
|
"default": {
|
|
features: "",
|
|
expectImplementation: "incubating",
|
|
},
|
|
|
|
// Alpha features need the experimental implementation.
|
|
"alpha": {
|
|
features: "AllAlpha=true,AllBeta=true",
|
|
expectImplementation: "incubating",
|
|
},
|
|
} {
|
|
t.Run(name, func(t *testing.T) {
|
|
tCtx := ktesting.Init(t)
|
|
featureGate := utilfeature.DefaultFeatureGate.DeepCopy()
|
|
tCtx.ExpectNoError(featureGate.Set(tc.features), "set features")
|
|
fts := feature.NewSchedulerFeaturesFromGates(featureGate)
|
|
features := AllocatorFeatures(fts)
|
|
|
|
// Slightly hacky: most arguments are not valid and the constructor
|
|
// is expected to not use them yet.
|
|
allocator, err := structured.NewAllocator(tCtx, features, structured.AllocatedState{}, nil, nil, nil)
|
|
tCtx.ExpectNoError(err, "create allocator")
|
|
allocatorType := fmt.Sprintf("%T", allocator)
|
|
if !strings.Contains(allocatorType, tc.expectImplementation) {
|
|
tCtx.Fatalf("Expected allocator implementation %q, got %s", tc.expectImplementation, allocatorType)
|
|
}
|
|
})
|
|
}
|
|
}
|
|
|
|
func Test_computesScore(t *testing.T) {
|
|
testcases := map[string]struct {
|
|
claims []*resourceapi.ResourceClaim
|
|
allocations nodeAllocation
|
|
expectedScore int64
|
|
expectErr bool
|
|
}{
|
|
"more-claims-than-allocations": {
|
|
claims: []*resourceapi.ResourceClaim{
|
|
st.MakeResourceClaim().
|
|
NamedRequestWithPrioritizedList("req-1",
|
|
st.SubRequest("subreq-1", className, 1),
|
|
).
|
|
Obj(),
|
|
st.MakeResourceClaim().
|
|
NamedRequestWithPrioritizedList("req-2",
|
|
st.SubRequest("subreq-1", className, 1),
|
|
).
|
|
Obj(),
|
|
},
|
|
allocations: nodeAllocation{},
|
|
expectErr: true,
|
|
},
|
|
"single-request-only-subrequest-allocated": {
|
|
claims: []*resourceapi.ResourceClaim{
|
|
st.MakeResourceClaim().
|
|
NamedRequestWithPrioritizedList("req-1",
|
|
st.SubRequest("subreq-1", className, 1),
|
|
).
|
|
Obj(),
|
|
},
|
|
allocations: nodeAllocation{
|
|
allocationResults: []resourceapi.AllocationResult{
|
|
{
|
|
Devices: resourceapi.DeviceAllocationResult{
|
|
Results: []resourceapi.DeviceRequestAllocationResult{
|
|
{
|
|
Request: "req-1/subreq-1",
|
|
},
|
|
},
|
|
},
|
|
},
|
|
},
|
|
},
|
|
expectedScore: 8,
|
|
},
|
|
"single-request-last-subrequest-allocated": {
|
|
claims: []*resourceapi.ResourceClaim{
|
|
st.MakeResourceClaim().
|
|
NamedRequestWithPrioritizedList("req-1",
|
|
st.SubRequest("subreq-1", className, 1),
|
|
st.SubRequest("subreq-2", className, 1),
|
|
st.SubRequest("subreq-3", className, 1),
|
|
st.SubRequest("subreq-4", className, 1),
|
|
st.SubRequest("subreq-5", className, 1),
|
|
st.SubRequest("subreq-6", className, 1),
|
|
st.SubRequest("subreq-7", className, 1),
|
|
st.SubRequest("subreq-8", className, 1),
|
|
).
|
|
Obj(),
|
|
},
|
|
allocations: nodeAllocation{
|
|
allocationResults: []resourceapi.AllocationResult{
|
|
{
|
|
Devices: resourceapi.DeviceAllocationResult{
|
|
Results: []resourceapi.DeviceRequestAllocationResult{
|
|
{
|
|
Request: "req-1/subreq-8",
|
|
},
|
|
},
|
|
},
|
|
},
|
|
},
|
|
},
|
|
expectedScore: 1,
|
|
},
|
|
"multiple-requests-with-middle-subrequests-allocated": {
|
|
claims: []*resourceapi.ResourceClaim{
|
|
st.MakeResourceClaim().
|
|
NamedRequestWithPrioritizedList("req-1",
|
|
st.SubRequest("subreq-1", className, 1),
|
|
st.SubRequest("subreq-2", className, 1),
|
|
st.SubRequest("subreq-3", className, 1),
|
|
st.SubRequest("subreq-4", className, 1),
|
|
).
|
|
NamedRequestWithPrioritizedList("req-2",
|
|
st.SubRequest("subreq-1", className, 1),
|
|
st.SubRequest("subreq-2", className, 1),
|
|
st.SubRequest("subreq-3", className, 1),
|
|
st.SubRequest("subreq-4", className, 1),
|
|
st.SubRequest("subreq-5", className, 1),
|
|
).
|
|
Obj(),
|
|
},
|
|
allocations: nodeAllocation{
|
|
allocationResults: []resourceapi.AllocationResult{
|
|
{
|
|
Devices: resourceapi.DeviceAllocationResult{
|
|
Results: []resourceapi.DeviceRequestAllocationResult{
|
|
{
|
|
Request: "req-1/subreq-4",
|
|
},
|
|
{
|
|
Request: "req-2/subreq-5",
|
|
},
|
|
},
|
|
},
|
|
},
|
|
},
|
|
},
|
|
expectedScore: 9,
|
|
},
|
|
"multiple-requests-with-top-subrequests-allocated": {
|
|
claims: []*resourceapi.ResourceClaim{
|
|
st.MakeResourceClaim().
|
|
NamedRequestWithPrioritizedList("req-1",
|
|
st.SubRequest("subreq-1", className, 1),
|
|
st.SubRequest("subreq-2", className, 1),
|
|
st.SubRequest("subreq-3", className, 1),
|
|
st.SubRequest("subreq-4", className, 1),
|
|
st.SubRequest("subreq-5", className, 1),
|
|
st.SubRequest("subreq-6", className, 1),
|
|
st.SubRequest("subreq-7", className, 1),
|
|
st.SubRequest("subreq-8", className, 1),
|
|
).
|
|
NamedRequestWithPrioritizedList("req-2",
|
|
st.SubRequest("subreq-1", className, 1),
|
|
).
|
|
Obj(),
|
|
},
|
|
allocations: nodeAllocation{
|
|
allocationResults: []resourceapi.AllocationResult{
|
|
{
|
|
Devices: resourceapi.DeviceAllocationResult{
|
|
Results: []resourceapi.DeviceRequestAllocationResult{
|
|
{
|
|
Request: "req-1/subreq-8",
|
|
},
|
|
{
|
|
Request: "req-2/subreq-1",
|
|
},
|
|
},
|
|
},
|
|
},
|
|
},
|
|
},
|
|
expectedScore: 9,
|
|
},
|
|
"multiple-claims-with-last-subrequests-allocated": {
|
|
claims: []*resourceapi.ResourceClaim{
|
|
st.MakeResourceClaim().
|
|
NamedRequestWithPrioritizedList("req-1",
|
|
st.SubRequest("subreq-1", className, 1),
|
|
st.SubRequest("subreq-2", className, 1),
|
|
st.SubRequest("subreq-3", className, 1),
|
|
st.SubRequest("subreq-4", className, 1),
|
|
st.SubRequest("subreq-5", className, 1),
|
|
st.SubRequest("subreq-6", className, 1),
|
|
st.SubRequest("subreq-7", className, 1),
|
|
st.SubRequest("subreq-8", className, 1),
|
|
).
|
|
Obj(),
|
|
st.MakeResourceClaim().
|
|
NamedRequestWithPrioritizedList("req-2",
|
|
st.SubRequest("subreq-1", className, 1),
|
|
st.SubRequest("subreq-2", className, 1),
|
|
st.SubRequest("subreq-3", className, 1),
|
|
st.SubRequest("subreq-4", className, 1),
|
|
st.SubRequest("subreq-5", className, 1),
|
|
st.SubRequest("subreq-6", className, 1),
|
|
st.SubRequest("subreq-7", className, 1),
|
|
st.SubRequest("subreq-8", className, 1),
|
|
).
|
|
Obj(),
|
|
},
|
|
allocations: nodeAllocation{
|
|
allocationResults: []resourceapi.AllocationResult{
|
|
{
|
|
Devices: resourceapi.DeviceAllocationResult{
|
|
Results: []resourceapi.DeviceRequestAllocationResult{
|
|
{
|
|
Request: "req-1/subreq-8",
|
|
},
|
|
},
|
|
},
|
|
},
|
|
{
|
|
Devices: resourceapi.DeviceAllocationResult{
|
|
Results: []resourceapi.DeviceRequestAllocationResult{
|
|
{
|
|
Request: "req-2/subreq-8",
|
|
},
|
|
},
|
|
},
|
|
},
|
|
},
|
|
},
|
|
expectedScore: 2,
|
|
},
|
|
"multiple-claims-with-top-subrequests-allocated": {
|
|
claims: []*resourceapi.ResourceClaim{
|
|
st.MakeResourceClaim().
|
|
NamedRequestWithPrioritizedList("req-1",
|
|
st.SubRequest("subreq-1", className, 1),
|
|
).
|
|
Obj(),
|
|
st.MakeResourceClaim().
|
|
NamedRequestWithPrioritizedList("req-2",
|
|
st.SubRequest("subreq-1", className, 1),
|
|
).
|
|
Obj(),
|
|
},
|
|
allocations: nodeAllocation{
|
|
allocationResults: []resourceapi.AllocationResult{
|
|
{
|
|
Devices: resourceapi.DeviceAllocationResult{
|
|
Results: []resourceapi.DeviceRequestAllocationResult{
|
|
{
|
|
Request: "req-1/subreq-1",
|
|
},
|
|
},
|
|
},
|
|
},
|
|
{
|
|
Devices: resourceapi.DeviceAllocationResult{
|
|
Results: []resourceapi.DeviceRequestAllocationResult{
|
|
{
|
|
Request: "req-2/subreq-1",
|
|
},
|
|
},
|
|
},
|
|
},
|
|
},
|
|
},
|
|
expectedScore: 16,
|
|
},
|
|
}
|
|
|
|
for name, tc := range testcases {
|
|
t.Run(name, func(t *testing.T) {
|
|
iterator := slices.All(tc.claims)
|
|
score, err := computeScore(iterator, tc.allocations)
|
|
if err != nil {
|
|
if !tc.expectErr {
|
|
t.Fatalf("unexpected error: %v", err)
|
|
}
|
|
return
|
|
}
|
|
if tc.expectErr {
|
|
t.Fatal("expected error, got none")
|
|
}
|
|
assert.Equal(t, tc.expectedScore, score)
|
|
})
|
|
}
|
|
}
|
|
|
|
func TestNormalizeScore(t *testing.T) {
|
|
testcases := map[string]struct {
|
|
scores fwk.NodeScoreList
|
|
expectedScores fwk.NodeScoreList
|
|
}{
|
|
"empty": {
|
|
scores: fwk.NodeScoreList{},
|
|
expectedScores: fwk.NodeScoreList{},
|
|
},
|
|
"single-score": {
|
|
scores: fwk.NodeScoreList{
|
|
{
|
|
Name: "node-1",
|
|
Score: 42,
|
|
},
|
|
},
|
|
expectedScores: fwk.NodeScoreList{
|
|
{
|
|
Name: "node-1",
|
|
Score: 100,
|
|
},
|
|
},
|
|
},
|
|
"all-same": {
|
|
scores: fwk.NodeScoreList{
|
|
{
|
|
Name: "node-1",
|
|
Score: 8,
|
|
},
|
|
{
|
|
Name: "node-2",
|
|
Score: 8,
|
|
},
|
|
},
|
|
expectedScores: fwk.NodeScoreList{
|
|
{
|
|
Name: "node-1",
|
|
Score: 100,
|
|
},
|
|
{
|
|
Name: "node-2",
|
|
Score: 100,
|
|
},
|
|
},
|
|
},
|
|
"all-same-very-large": {
|
|
scores: fwk.NodeScoreList{
|
|
{
|
|
Name: "node-1",
|
|
Score: math.MaxInt32,
|
|
},
|
|
{
|
|
Name: "node-2",
|
|
Score: math.MaxInt32,
|
|
},
|
|
},
|
|
expectedScores: fwk.NodeScoreList{
|
|
{
|
|
Name: "node-1",
|
|
Score: 100,
|
|
},
|
|
{
|
|
Name: "node-2",
|
|
Score: 100,
|
|
},
|
|
},
|
|
},
|
|
"max-and-min-values": {
|
|
scores: fwk.NodeScoreList{
|
|
{
|
|
Name: "node-1",
|
|
Score: math.MaxInt32,
|
|
},
|
|
{
|
|
Name: "node-2",
|
|
Score: 0,
|
|
},
|
|
},
|
|
expectedScores: fwk.NodeScoreList{
|
|
{
|
|
Name: "node-1",
|
|
Score: 100,
|
|
},
|
|
{
|
|
Name: "node-2",
|
|
Score: 0,
|
|
},
|
|
},
|
|
},
|
|
"mid-value": {
|
|
scores: fwk.NodeScoreList{
|
|
{
|
|
Name: "node-1",
|
|
Score: 99,
|
|
},
|
|
{
|
|
Name: "node-2",
|
|
Score: 98,
|
|
},
|
|
{
|
|
Name: "node-3",
|
|
Score: 97,
|
|
},
|
|
},
|
|
expectedScores: fwk.NodeScoreList{
|
|
{
|
|
Name: "node-1",
|
|
Score: 100,
|
|
},
|
|
{
|
|
Name: "node-2",
|
|
Score: 98,
|
|
},
|
|
{
|
|
Name: "node-3",
|
|
Score: 97,
|
|
},
|
|
},
|
|
},
|
|
"large-spread-lost-precision": {
|
|
scores: fwk.NodeScoreList{
|
|
{
|
|
Name: "node-1",
|
|
Score: math.MaxInt32,
|
|
},
|
|
{
|
|
Name: "node-2",
|
|
Score: math.MaxInt32 - 1,
|
|
},
|
|
{
|
|
Name: "node-3",
|
|
Score: 1,
|
|
},
|
|
{
|
|
Name: "node-4",
|
|
Score: 0,
|
|
},
|
|
},
|
|
expectedScores: fwk.NodeScoreList{
|
|
{
|
|
Name: "node-1",
|
|
Score: 100,
|
|
},
|
|
{
|
|
Name: "node-2",
|
|
Score: 99,
|
|
},
|
|
{
|
|
Name: "node-3",
|
|
Score: 0,
|
|
},
|
|
{
|
|
Name: "node-4",
|
|
Score: 0,
|
|
},
|
|
},
|
|
},
|
|
}
|
|
|
|
for name, tc := range testcases {
|
|
t.Run(name, func(t *testing.T) {
|
|
pl := &DynamicResources{
|
|
enabled: true,
|
|
}
|
|
scores := tc.scores
|
|
_ = pl.NormalizeScore(context.Background(), nil, nil, scores)
|
|
assert.Equal(t, tc.expectedScores, scores)
|
|
})
|
|
}
|
|
}
|
|
|
|
func TestGatherAllocatedState(t *testing.T) {
|
|
tCtx := ktesting.Init(t)
|
|
testGatherAllocatedState(tCtx)
|
|
}
|
|
func testGatherAllocatedState(tCtx ktesting.TContext) {
|
|
testcases := map[string]struct {
|
|
allocatedResourceClaims []*resourceapi.ResourceClaim
|
|
inflightResourceClaims map[types.UID]*resourceapi.ResourceClaim
|
|
enabledConsumableCapacity bool
|
|
expectErr bool
|
|
expectedIDAllocated int
|
|
expectedSharedIDAllocated int
|
|
expectedConsumedCapacity string
|
|
}{
|
|
"no-claims": {
|
|
expectedIDAllocated: 0,
|
|
expectedSharedIDAllocated: 0,
|
|
},
|
|
"single-allocated-claim": {
|
|
allocatedResourceClaims: []*resourceapi.ResourceClaim{
|
|
allocatedClaim,
|
|
},
|
|
expectedIDAllocated: 1,
|
|
expectedSharedIDAllocated: 0,
|
|
},
|
|
"single-allocated-claim-with-shared-device": {
|
|
enabledConsumableCapacity: true,
|
|
allocatedResourceClaims: []*resourceapi.ResourceClaim{
|
|
allocatedClaimWithSharedDevice,
|
|
},
|
|
expectedIDAllocated: 0,
|
|
expectedSharedIDAllocated: 1,
|
|
},
|
|
"single-allocated-claim-with-capacity": {
|
|
enabledConsumableCapacity: true,
|
|
allocatedResourceClaims: []*resourceapi.ResourceClaim{
|
|
allocatedClaimWithConsumedCapacity,
|
|
},
|
|
expectedIDAllocated: 0,
|
|
expectedSharedIDAllocated: 1,
|
|
expectedConsumedCapacity: "1",
|
|
},
|
|
"disabled-single-allocated-claim-with-capacity": {
|
|
enabledConsumableCapacity: false,
|
|
allocatedResourceClaims: []*resourceapi.ResourceClaim{
|
|
allocatedClaimWithConsumedCapacity,
|
|
},
|
|
expectedIDAllocated: 1,
|
|
expectedSharedIDAllocated: 0,
|
|
expectedConsumedCapacity: "",
|
|
},
|
|
"mixed-allocated-claim": {
|
|
enabledConsumableCapacity: true,
|
|
allocatedResourceClaims: []*resourceapi.ResourceClaim{
|
|
allocatedClaim,
|
|
allocatedClaimWithConsumedCapacity,
|
|
},
|
|
expectedIDAllocated: 1,
|
|
expectedSharedIDAllocated: 1,
|
|
expectedConsumedCapacity: "1",
|
|
},
|
|
"add-inflight-allocated-claim-with-capacity": {
|
|
enabledConsumableCapacity: true,
|
|
allocatedResourceClaims: []*resourceapi.ResourceClaim{
|
|
allocatedClaim,
|
|
allocatedClaimWithConsumedCapacity,
|
|
},
|
|
inflightResourceClaims: map[types.UID]*resourceapi.ResourceClaim{
|
|
"claim-2-uid": allocatedClaimWithConsumedCapacity2,
|
|
},
|
|
expectedIDAllocated: 1,
|
|
expectedSharedIDAllocated: 2,
|
|
expectedConsumedCapacity: "2",
|
|
},
|
|
"disabled-inflight-allocated-claim-with-capacity": {
|
|
enabledConsumableCapacity: false,
|
|
allocatedResourceClaims: []*resourceapi.ResourceClaim{
|
|
allocatedClaim,
|
|
allocatedClaimWithConsumedCapacity,
|
|
},
|
|
inflightResourceClaims: map[types.UID]*resourceapi.ResourceClaim{
|
|
"claim-2-uid": allocatedClaimWithConsumedCapacity2,
|
|
},
|
|
expectedIDAllocated: 2,
|
|
expectedSharedIDAllocated: 0,
|
|
expectedConsumedCapacity: "",
|
|
},
|
|
}
|
|
for name, tc := range testcases {
|
|
tCtx.Run(name, func(tCtx ktesting.TContext) {
|
|
featuregatetesting.SetFeatureGateDuringTest(tCtx, utilfeature.DefaultFeatureGate, features.DRAConsumableCapacity, tc.enabledConsumableCapacity)
|
|
|
|
tCtx.Helper()
|
|
logger := klog.FromContext(tCtx)
|
|
draManager := &DefaultDRAManager{
|
|
resourceClaimTracker: &claimTracker{
|
|
inFlightAllocations: &sync.Map{},
|
|
allocatedDevices: newAllocatedDevices(logger),
|
|
},
|
|
}
|
|
for _, obj := range tc.allocatedResourceClaims {
|
|
draManager.resourceClaimTracker.allocatedDevices.handlers().OnAdd(obj, false)
|
|
}
|
|
if tc.inflightResourceClaims != nil {
|
|
for claimUID, obj := range tc.inflightResourceClaims {
|
|
err := draManager.resourceClaimTracker.SignalClaimPendingAllocation(claimUID, obj)
|
|
if err != nil {
|
|
if !tc.expectErr {
|
|
tCtx.Fatalf("unexpected error: %v", err)
|
|
return
|
|
}
|
|
}
|
|
if tc.expectErr {
|
|
tCtx.Fatal("expected error, got none")
|
|
}
|
|
}
|
|
}
|
|
|
|
// Start the test from here
|
|
allocatedState, err := draManager.ResourceClaims().GatherAllocatedState()
|
|
if err != nil {
|
|
if !tc.expectErr {
|
|
tCtx.Fatalf("unexpected error: %v", err)
|
|
return
|
|
}
|
|
}
|
|
if tc.expectErr {
|
|
tCtx.Fatal("expected error, got none")
|
|
}
|
|
allocatedDeviceIDs := allocatedState.AllocatedDevices
|
|
allocatedSharedDeviceIDs := allocatedState.AllocatedSharedDeviceIDs
|
|
aggregatedCapacity := allocatedState.AggregatedCapacity
|
|
|
|
// Verify the counts match expectations
|
|
if allocatedDeviceIDs.Len() != tc.expectedIDAllocated {
|
|
tCtx.Errorf("expected %d allocated device IDs, got %d", tc.expectedIDAllocated, allocatedDeviceIDs.Len())
|
|
}
|
|
if allocatedSharedDeviceIDs.Len() != tc.expectedSharedIDAllocated {
|
|
tCtx.Errorf("expected %d allocated shared device IDs, got %d", tc.expectedSharedIDAllocated, allocatedSharedDeviceIDs.Len())
|
|
}
|
|
|
|
// Verify aggregated capacity is initialized
|
|
if aggregatedCapacity == nil {
|
|
tCtx.Error("aggregatedCapacity should not be nil")
|
|
}
|
|
if tc.expectedConsumedCapacity != "" {
|
|
if len(aggregatedCapacity) == 0 {
|
|
tCtx.Errorf("expected consumed capacity, got empty")
|
|
}
|
|
deviceID := schedulerapi.MakeDeviceID(driver, nodeName, sharedDeviceName)
|
|
capacity := aggregatedCapacity[deviceID]
|
|
if capacity == nil {
|
|
tCtx.Errorf("expected aggregated capacity of %s, got nil", deviceID)
|
|
return
|
|
}
|
|
value := capacity[capacityName]
|
|
if value == nil {
|
|
tCtx.Errorf("expected value of %s, got nil", capacityName)
|
|
return
|
|
}
|
|
if value.Cmp(apiresource.MustParse(tc.expectedConsumedCapacity)) != 0 {
|
|
tCtx.Errorf("expected value of %s to be %s, got %s", capacityName, tc.expectedConsumedCapacity, value)
|
|
}
|
|
} else if len(aggregatedCapacity) > 0 {
|
|
tCtx.Errorf("got unexpected consumed capacity")
|
|
}
|
|
})
|
|
}
|
|
|
|
}
|