Merge pull request #134882 from yliaog/initcon

Fix non-sidecar init container device requests
This commit is contained in:
Kubernetes Prow Robot 2025-11-05 21:57:04 -08:00 committed by GitHub
commit 7537d52c2e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 616 additions and 198 deletions

View file

@ -6020,6 +6020,7 @@ func validatePodExtendedResourceClaimStatus(status *core.PodExtendedResourceClai
type key struct {
container string
resource string
request string
}
seen := map[key]struct{}{}
for i, rm := range status.RequestMappings {
@ -6033,10 +6034,11 @@ func validatePodExtendedResourceClaimStatus(status *core.PodExtendedResourceClai
allErrs = append(allErrs, field.Invalid(idxPath.Child("containerName"), rm.ContainerName, "must match the name of an entry in spec.initContainers.name or spec.containers.name"))
}
allErrs = append(allErrs, ValidateDNS1123Label(rm.RequestName, fldPath.Child("requestName"))...)
k := key{container: rm.ContainerName, resource: rm.ResourceName}
k := key{container: rm.ContainerName, resource: rm.ResourceName, request: rm.RequestName}
if _, ok := seen[k]; ok {
allErrs = append(allErrs, field.Duplicate(idxPath.Child("containerName"), rm.ContainerName))
allErrs = append(allErrs, field.Duplicate(idxPath.Child("resourceName"), rm.ResourceName))
allErrs = append(allErrs, field.Duplicate(idxPath.Child("requestName"), rm.RequestName))
}
seen[k] = struct{}{}
}

View file

@ -15284,7 +15284,37 @@ func TestValidatePodStatusUpdate(t *testing.T) {
),
old: *podtest.MakePod("foo"),
err: "Duplicate value: \"ctr\"",
test: "invalid container name and extended resource name in requestMapping in ExtendedResourceClaimStatus",
test: "invalid duplicate container name,, extended resource name, and request name in requestMapping in ExtendedResourceClaimStatus",
}, {
new: *podtest.MakePod("foo",
podtest.SetContainers(podtest.MakeContainer("ctr", podtest.SetContainerResources(
podtest.MakeResourceRequirements(
map[string]string{
string("example.com/gpu"): "1",
},
map[string]string{
string("example.com/gpu"): "1",
})))),
podtest.SetStatus(core.PodStatus{
ExtendedResourceClaimStatus: &core.PodExtendedResourceClaimStatus{
ResourceClaimName: "xyz",
RequestMappings: []core.ContainerExtendedResourceRequest{
{
ContainerName: "ctr",
ResourceName: "example.com/gpu",
RequestName: "container-0-request-0",
},
{
ContainerName: "ctr",
ResourceName: "example.com/gpu",
RequestName: "container-1-request-0",
},
},
},
}),
),
old: *podtest.MakePod("foo"),
test: "valid duplicate container name,, extended resource name in requestMapping in ExtendedResourceClaimStatus",
}, {
new: *podtest.MakePod("foo",
podtest.SetContainers(podtest.MakeContainer("ctr", podtest.SetContainerResources(

View file

@ -527,17 +527,13 @@ func (m *Manager) GetResources(pod *v1.Pod, container *v1.Container) (*Container
continue
}
if schedutil.IsDRAExtendedResourceName(rName) {
requestName := ""
for _, rm := range pod.Status.ExtendedResourceClaimStatus.RequestMappings {
// allow multiple device requests per container per resource.
if rm.ContainerName == container.Name && rm.ResourceName == rName.String() {
requestName = rm.RequestName
break
// As of Kubernetes 1.31, CDI device IDs are not passed via annotations anymore.
cdiDevices = append(cdiDevices, claimInfo.cdiDevicesAsList(rm.RequestName)...)
}
}
if requestName != "" {
// As of Kubernetes 1.31, CDI device IDs are not passed via annotations anymore.
cdiDevices = append(cdiDevices, claimInfo.cdiDevicesAsList(requestName)...)
}
}
}
return nil

View file

@ -156,6 +156,9 @@ type nodeAllocation struct {
// extendedResourceClaim has the special claim for extended resource backed by DRA
// created during Filter for the nodes.
extendedResourceClaim *resourceapi.ResourceClaim
// containerResourceRequestMappings has the container, extended resource, and device request mappings
// calculated at the Filter phase, and used at the PreBind phase.
containerResourceRequestMappings []v1.ContainerExtendedResourceRequest
}
// DynamicResources is a plugin that ensures that ResourceClaims are allocated.
@ -729,17 +732,17 @@ func getStateData(cs fwk.CycleState) (*stateData, error) {
// It returns an error when the pod's extended resource requests cannot be allocated
// from node's Allocatable, nor matching any device class's explicit or implicit
// ExtendedResourceName.
func (pl *DynamicResources) filterExtendedResources(state *stateData, pod *v1.Pod, nodeInfo fwk.NodeInfo, logger klog.Logger) (*resourceapi.ResourceClaim, *fwk.Status) {
func (pl *DynamicResources) filterExtendedResources(state *stateData, pod *v1.Pod, nodeInfo fwk.NodeInfo, logger klog.Logger) (*resourceapi.ResourceClaim, []v1.ContainerExtendedResourceRequest, *fwk.Status) {
extendedResourceClaim := state.claims.extendedResourceClaim()
if extendedResourceClaim == nil {
// Nothing to do.
return nil, nil
return nil, nil, nil
}
// The claim is from the prior scheduling cycle, return unschedulable such that it can be
// deleted at the PostFilter phase, and retry anew.
if extendedResourceClaim.Spec.Devices.Requests != nil {
return nil, statusUnschedulable(logger, "cannot schedule extended resource claim", "pod", klog.KObj(pod), "node", klog.KObj(nodeInfo.Node()), "claim", klog.KObj(extendedResourceClaim))
return nil, nil, statusUnschedulable(logger, "cannot schedule extended resource claim", "pod", klog.KObj(pod), "node", klog.KObj(nodeInfo.Node()), "claim", klog.KObj(extendedResourceClaim))
}
extendedResources := make(map[v1.ResourceName]int64)
@ -755,22 +758,16 @@ func (pl *DynamicResources) filterExtendedResources(state *stateData, pod *v1.Po
}
allocatable, okScalar := nodeInfo.GetAllocatable().GetScalarResources()[rName]
isBackedByDRA := cache.GetDeviceClass(rName) != nil
if isBackedByDRA {
if allocatable > 0 {
// node provides the resource via device plugin
extendedResources[rName] = 0
} else {
// node needs to provide the resource via DRA
extendedResources[rName] = rQuant
hasExtendedResource = true
}
if isBackedByDRA && allocatable == 0 {
// node needs to provide the resource via DRA
extendedResources[rName] = rQuant
hasExtendedResource = true
} else if !okScalar {
// has request neither provided by device plugin, nor backed by DRA,
// hence the pod does not fit the node.
return nil, statusUnschedulable(logger, "cannot fit resource", "pod", klog.KObj(pod), "node", klog.KObj(nodeInfo.Node()), "resource", rName)
return nil, nil, statusUnschedulable(logger, "cannot fit resource", "pod", klog.KObj(pod), "node", klog.KObj(nodeInfo.Node()), "resource", rName)
}
}
// No extended resources backed by DRA on this node.
// The pod may have extended resources, but they are all backed by device
// plugin, hence the noderesources plugin should have checked if the node
@ -779,93 +776,23 @@ func (pl *DynamicResources) filterExtendedResources(state *stateData, pod *v1.Po
if state.claims.noUserClaim() && !hasExtendedResource {
// It cannot be allocated when reaching here, as the claim from prior scheduling cycle
// would return unschedulable earlier in this function.
return nil, nil
return nil, nil, nil
}
// Each node needs its own, potentially different variant of the claim.
nodeExtendedResourceClaim := extendedResourceClaim.DeepCopy()
nodeExtendedResourceClaim.Spec.Devices.Requests = createDeviceRequests(pod, extendedResources, cache)
if extendedResourceClaim.Status.Allocation != nil {
// If it is already allocated, then we cannot simply allocate it again.
//
// It cannot be allocated when reaching here, as the claim found from prior scheduling cycle
// would return unschedulable earlier in this function.
return nil, nil
return nil, nil, nil
}
return nodeExtendedResourceClaim, nil
}
// Each node needs its own, potentially different variant of the claim.
nodeExtendedResourceClaim := extendedResourceClaim.DeepCopy()
reqs, mappings := createRequestsAndMappings(pod, extendedResources, logger, cache)
nodeExtendedResourceClaim.Spec.Devices.Requests = reqs
// createDeviceRequests computes the special claim's Requests based on the pod's extended resources
// that are not satisfied by the node's Allocatable.
//
// the device request name has the format: container-%d-request-%d,
// the first %d is the container's index in the pod's initContainer and containers
// the second %d is the extended resource's index in that container's sorted resource requests.
func createDeviceRequests(pod *v1.Pod, extendedResources map[v1.ResourceName]int64, cache fwk.DeviceClassResolver) []resourceapi.DeviceRequest {
var deviceRequests []resourceapi.DeviceRequest
// Creating the extended resource claim's Requests by
// iterating over the containers, and the resources in the containers,
// and create one request per <container, extended resource>.
// pod level resources currently have only cpu and memory, they are not considered here for now.
// if extended resources are added to pod level resources in the future, they need to be
// supported separately.
containers := slices.Clone(pod.Spec.InitContainers)
containers = append(containers, pod.Spec.Containers...)
for r := range extendedResources {
for i, c := range containers {
creqs := c.Resources.Requests
if creqs == nil {
continue
}
var rQuant resource.Quantity
var ok bool
if rQuant, ok = creqs[r]; !ok {
continue
}
crq, ok := (&rQuant).AsInt64()
if !ok || crq == 0 {
continue
}
class := cache.GetDeviceClass(r)
// skip if the request does not map to a device class
if class == nil {
continue
}
keys := make([]string, 0, len(creqs))
for k := range creqs {
keys = append(keys, k.String())
}
// resource requests in a container is a map, their names must
// be sorted to determine the resource's index order.
slice.SortStrings(keys)
ridx := 0
for j := range keys {
if keys[j] == r.String() {
ridx = j
break
}
}
// i is the index of the container if the list of initContainers + containers.
// ridx is the index of the extended resource request in the sorted all requests in the container.
// crq is the quantity of the extended resource request.
deviceRequests = append(deviceRequests,
resourceapi.DeviceRequest{
Name: fmt.Sprintf("container-%d-request-%d", i, ridx), // need to be container name index - extended resource name index
Exactly: &resourceapi.ExactDeviceRequest{
DeviceClassName: class.Name, // map external resource name -> device class name
AllocationMode: resourceapi.DeviceAllocationModeExactCount,
Count: crq,
},
})
}
}
sort.Slice(deviceRequests, func(i, j int) bool {
return deviceRequests[i].Name < deviceRequests[j].Name
})
return deviceRequests
return nodeExtendedResourceClaim, mappings, nil
}
// Filter invoked at the filter extension point.
@ -891,7 +818,7 @@ func (pl *DynamicResources) Filter(ctx context.Context, cs fwk.CycleState, pod *
logger := klog.FromContext(ctx)
node := nodeInfo.Node()
nodeExtendedResourceClaim, status := pl.filterExtendedResources(state, pod, nodeInfo, logger)
nodeExtendedResourceClaim, containerResourceRequestMappings, status := pl.filterExtendedResources(state, pod, nodeInfo, logger)
if status != nil {
return status
}
@ -1006,8 +933,9 @@ func (pl *DynamicResources) Filter(ctx context.Context, cs fwk.CycleState, pod *
if state.allocator != nil {
state.nodeAllocations[node.Name] = nodeAllocation{
allocationResults: allocations,
extendedResourceClaim: nodeExtendedResourceClaim,
allocationResults: allocations,
extendedResourceClaim: nodeExtendedResourceClaim,
containerResourceRequestMappings: containerResourceRequestMappings,
}
}
@ -1429,55 +1357,167 @@ func (pl *DynamicResources) PreBindPreFlight(ctx context.Context, cs fwk.CycleSt
return nil
}
// createRequestMappings creates the requestMappings for the special extended resource claim.
// For each device request in the claim, it finds the container name, and
// the extended resource name in that container matching the device request.
// the device request name has the format: container-%d-request-%d,
// the first %d is the container's index in the pod's initContainer and containers
// the second %d is the extended resource's index in that container's sorted resource requests.
func createRequestMappings(claim *resourceapi.ResourceClaim, pod *v1.Pod) []v1.ContainerExtendedResourceRequest {
var cer []v1.ContainerExtendedResourceRequest
deviceReqNames := make([]string, 0, len(claim.Spec.Devices.Requests))
for _, r := range claim.Spec.Devices.Requests {
deviceReqNames = append(deviceReqNames, r.Name)
func partitionContainerIndices(containers []v1.Container, numInitContainers int) ([]int, []int) {
longLivedContainerIndices := make([]int, 0, len(containers))
shortLivedInitContainerIndices := make([]int, 0, numInitContainers)
for i, c := range containers {
isInit := i < numInitContainers
isSidecar := c.RestartPolicy != nil && *c.RestartPolicy == v1.ContainerRestartPolicyAlways
if isInit && !isSidecar {
shortLivedInitContainerIndices = append(shortLivedInitContainerIndices, i)
continue
}
longLivedContainerIndices = append(longLivedContainerIndices, i)
}
// pod level resources currently have only cpu and memory, they are not considered here for now.
// if extended resources are added to pod level resources in the future, they need to be
// supported separately.
return longLivedContainerIndices, shortLivedInitContainerIndices
}
// createResourceRequestAndMappings returns the request and mappings for the given container and resource.
// reusableRequests is a list of other DeviceRequests this container can use before requesting its own.
// items in reusableRequests may be nil.
// The returned request may be nil if no additional request was required.
// The returned mappings may be empty if this container does not use this resource.
func createResourceRequestAndMappings(containerIndex int, container *v1.Container, rName v1.ResourceName, className string, reusableRequests []*resourceapi.DeviceRequest) (*resourceapi.DeviceRequest, []v1.ContainerExtendedResourceRequest) {
var mappings []v1.ContainerExtendedResourceRequest
creqs := container.Resources.Requests
if creqs == nil {
return nil, nil
}
var rQuant resource.Quantity
var ok bool
if rQuant, ok = creqs[rName]; !ok {
return nil, nil
}
crq, ok := (&rQuant).AsInt64()
if !ok || crq == 0 {
return nil, nil
}
sum := int64(0)
for _, r := range reusableRequests {
if r != nil {
sum += r.Exactly.Count
mappings = append(mappings, v1.ContainerExtendedResourceRequest{
ContainerName: container.Name,
ResourceName: rName.String(),
RequestName: r.Name,
})
if sum >= crq {
return nil, mappings
}
}
}
keys := make([]string, 0, len(creqs))
for k := range creqs {
keys = append(keys, k.String())
}
// resource requests in a container is a map, their names must
// be sorted to determine the resource's index order.
slice.SortStrings(keys)
ridx := 0
for j := range keys {
if keys[j] == rName.String() {
ridx = j
break
}
}
// containerIndex is the index of the container in the list of initContainers + containers.
// ridx is the index of the extended resource request in the sorted all requests in the container.
// crq is the quantity of the extended resource request.
reqName := fmt.Sprintf("container-%d-request-%d", containerIndex, ridx)
deviceReq := resourceapi.DeviceRequest{
Name: reqName, // need to be container name index - extended resource name index
Exactly: &resourceapi.ExactDeviceRequest{
DeviceClassName: className,
AllocationMode: resourceapi.DeviceAllocationModeExactCount,
Count: crq - sum, // the extra devices to request
},
}
mappings = append(mappings, v1.ContainerExtendedResourceRequest{
ContainerName: container.Name,
ResourceName: rName.String(),
RequestName: reqName,
})
return &deviceReq, mappings
}
func createRequestsAndMappings(pod *v1.Pod, extendedResources map[v1.ResourceName]int64, logger klog.Logger, deviceClassMapping fwk.DeviceClassResolver) ([]resourceapi.DeviceRequest, []v1.ContainerExtendedResourceRequest) {
containers := slices.Clone(pod.Spec.InitContainers)
containers = append(containers, pod.Spec.Containers...)
for i, c := range containers {
creqs := c.Resources.Requests
keys := make([]string, 0, len(creqs))
for k := range creqs {
keys = append(keys, k.String())
longLivedContainerIndices, shortLivedInitContainerIndices := partitionContainerIndices(containers, len(pod.Spec.InitContainers))
// all requests across all containers and resource types
var deviceRequests []resourceapi.DeviceRequest
// all mappings across all containers and resource types
var mappings []v1.ContainerExtendedResourceRequest
for resource := range extendedResources {
class := deviceClassMapping.GetDeviceClass(resource)
// skip if the resource does not map to a device class
if class == nil {
continue
}
// resource requests in a container is a map, their names must
// be sorted to determine the resource's index order.
slice.SortStrings(keys)
for rName := range creqs {
ridx := 0
for j := range keys {
if keys[j] == rName.String() {
ridx = j
break
// shortLivedResourceMappings is the mapping of container+resource→request for short lived containers (init non-sidecar container)
var shortLivedResourceMappings []v1.ContainerExtendedResourceRequest
// longLivedResourceMappings is the mapping of container+resource→request for long lived containers (init sidecar or regular container)
var longLivedResourceMappings []v1.ContainerExtendedResourceRequest
// longLivedResourceRequests is the list of requests for a given resource by long-lived containers.
// The length of this list is the same as the length of containers.
// Entries may be nil if the container at that index did not produce a request for that resource.
// Requests at later indices are reusable by non-sidecar initContainers at earlier indices.
longLivedResourceRequests := make([]*resourceapi.DeviceRequest, len(containers))
for _, i := range longLivedContainerIndices {
containerRequest, containerMappings := createResourceRequestAndMappings(i, &containers[i], resource, class.Name, nil)
longLivedResourceRequests[i] = containerRequest // might be nil
longLivedResourceMappings = append(longLivedResourceMappings, containerMappings...) // might be zero-length
}
// maxShortLivedResourceRequest is the maximum request for a given resource by short-lived containers
var maxShortLivedResourceRequest *resourceapi.DeviceRequest
// shortLivedRequestNames is all request names for a given resource by short-lived containers. All mappings to any name in
// this set will be replaced by maxShortLivedResourceRequest.Name.
shortLivedRequestNames := sets.New[string]()
for _, i := range shortLivedInitContainerIndices {
containerRequest, containerMappings := createResourceRequestAndMappings(i, &containers[i], resource, class.Name, longLivedResourceRequests[i:])
if containerRequest != nil {
shortLivedRequestNames.Insert(containerRequest.Name)
if maxShortLivedResourceRequest == nil || maxShortLivedResourceRequest.Exactly.Count < containerRequest.Exactly.Count {
maxShortLivedResourceRequest = containerRequest
}
}
for _, devReqName := range deviceReqNames {
// During filter phase, device request name is set to be
// container name index "-" extended resource name index
if fmt.Sprintf("container-%d-request-%d", i, ridx) == devReqName {
cer = append(cer,
v1.ContainerExtendedResourceRequest{
ContainerName: c.Name,
ResourceName: rName.String(),
RequestName: devReqName,
})
shortLivedResourceMappings = append(shortLivedResourceMappings, containerMappings...) // might be zero-length
}
// rewrite mappings to short-lived requests to use the maximum short-lived request name
if maxShortLivedResourceRequest != nil && len(shortLivedRequestNames) > 1 {
shortLivedRequestNames.Delete(maxShortLivedResourceRequest.Name)
for i := range shortLivedResourceMappings {
if shortLivedRequestNames.Has(shortLivedResourceMappings[i].RequestName) {
shortLivedResourceMappings[i].RequestName = maxShortLivedResourceRequest.Name
}
}
}
// append non-nil requests
if maxShortLivedResourceRequest != nil {
deviceRequests = append(deviceRequests, *maxShortLivedResourceRequest)
}
for _, request := range longLivedResourceRequests {
if request != nil {
deviceRequests = append(deviceRequests, *request)
}
}
// append mappings
mappings = append(mappings, longLivedResourceMappings...)
mappings = append(mappings, shortLivedResourceMappings...)
}
return cer
sort.Slice(deviceRequests, func(i, j int) bool {
return deviceRequests[i].Name < deviceRequests[j].Name
})
return deviceRequests, mappings
}
// bindClaim gets called by PreBind for claim which is not reserved for the pod yet.
@ -1636,7 +1676,14 @@ func (pl *DynamicResources) bindClaim(ctx context.Context, state *stateData, ind
// Patch the pod status with the new information about the generated
// special resource claim.
if isExtendedResourceClaim {
cer := createRequestMappings(claim, pod)
var cer []v1.ContainerExtendedResourceRequest
if nodeAllocation, ok := state.nodeAllocations[nodeName]; ok {
cer = nodeAllocation.containerResourceRequestMappings
}
if len(cer) == 0 {
return nil, fmt.Errorf("nil or empty request mappings, no update of pod %s/%s ExtendedResourceClaimStatus", pod.Namespace, pod.Name)
}
podStatusCopy := pod.Status.DeepCopy()
podStatusCopy.ExtendedResourceClaimStatus = &v1.PodExtendedResourceClaimStatus{
RequestMappings: cer,

View file

@ -90,6 +90,7 @@ var (
namespace = "default"
attrName = resourceapi.QualifiedName("healthy") // device attribute only available on non-default node
extendedResourceName = "example.com/gpu"
extendedResourceName2 = "example.com/gpu2"
implicitExtendedResourceName = "deviceclass.resource.kubernetes.io/my-resource-class"
deviceClass = &resourceapi.DeviceClass{
@ -105,7 +106,14 @@ var (
ExtendedResourceName: &extendedResourceName,
},
}
deviceClassWithExtendResourceName2 = &resourceapi.DeviceClass{
ObjectMeta: metav1.ObjectMeta{
Name: className + "2",
},
Spec: resourceapi.DeviceClassSpec{
ExtendedResourceName: &extendedResourceName2,
},
}
podWithClaimName = st.MakePod().Name(podName).Namespace(namespace).
UID(podUID).
PodResourceClaims(v1.PodResourceClaim{Name: resourceName, ResourceClaimName: &claimName}).
@ -140,6 +148,13 @@ var (
v1.ResourceName(extendedResourceName): "1",
}).
Obj()
podWithExtendedResourceName2 = st.MakePod().Name(podName).Namespace(namespace).
UID(podUID).
Req(map[v1.ResourceName]string{
v1.ResourceName(extendedResourceName): "1",
v1.ResourceName(extendedResourceName2): "1",
}).
Obj()
podWithImplicitExtendedResourceName = st.MakePod().Name(podName).Namespace(namespace).
UID(podUID).
Req(map[v1.ResourceName]string{
@ -291,6 +306,19 @@ var (
return st.MakeNodeSelector().In("metadata.name", []string{nodeName}, st.NodeSelectorTypeMatchFields).Obj()
}(),
}
extendedResourceAllocationResult2 = &resourceapi.AllocationResult{
Devices: resourceapi.DeviceAllocationResult{
Results: []resourceapi.DeviceRequestAllocationResult{{
Driver: driver,
Pool: nodeName,
Device: "instance-1",
Request: "container-0-request-1",
}},
},
NodeSelector: func() *v1.NodeSelector {
return st.MakeNodeSelector().In("metadata.name", []string{nodeName}, st.NodeSelectorTypeMatchFields).Obj()
}(),
}
implicitExtendedResourceAllocationResult = &resourceapi.AllocationResult{
Devices: resourceapi.DeviceAllocationResult{
Results: []resourceapi.DeviceRequestAllocationResult{
@ -513,6 +541,22 @@ var (
RequestWithName("container-0-request-0", className).
Allocation(extendedResourceAllocationResult).
Obj()
extendedResourceClaim2 = st.MakeResourceClaim().
Name("my-pod-extended-resources-0").
GenerateName("my-pod-extended-resources-").
Namespace(namespace).
Annotations(map[string]string{"resource.kubernetes.io/extended-resource-claim": "true"}).
OwnerRef(
metav1.OwnerReference{
APIVersion: "v1",
Kind: "Pod",
Name: podName,
UID: types.UID(podUID),
Controller: ptr.To(true),
}).
RequestWithName("container-0-request-1", className+"2").
Allocation(extendedResourceAllocationResult2).
Obj()
extendedResourceClaimNoName = st.MakeResourceClaim().
Name(specialClaimInMemName).
GenerateName("my-pod-extended-resources-").
@ -529,6 +573,22 @@ var (
RequestWithName("container-0-request-0", className).
Allocation(extendedResourceAllocationResult).
Obj()
extendedResourceClaimNoName2 = st.MakeResourceClaim().
Name(specialClaimInMemName).
GenerateName("my-pod-extended-resources-").
Namespace(namespace).
Annotations(map[string]string{"resource.kubernetes.io/extended-resource-claim": "true"}).
OwnerRef(
metav1.OwnerReference{
APIVersion: "v1",
Kind: "Pod",
Name: podName,
UID: types.UID(podUID),
Controller: ptr.To(true),
}).
RequestWithName("container-0-request-1", className+"2").
Allocation(extendedResourceAllocationResult2).
Obj()
implicitExtendedResourceClaim = st.MakeResourceClaim().
Name("my-pod-extended-resources-0").
GenerateName("my-pod-extended-resources-").
@ -1581,6 +1641,27 @@ func TestPlugin(t *testing.T) {
require.ErrorContains(t, err, "not found")
},
},
"extended-resource-one-device-plugin-one-dra": {
enableDRAExtendedResource: true,
enableDRADeviceBindingConditions: true,
enableDRAResourceClaimDeviceStatus: true,
nodes: []*v1.Node{workerNodeWithExtendedResource},
pod: podWithExtendedResourceName2,
classes: []*resourceapi.DeviceClass{deviceClassWithExtendResourceName, deviceClassWithExtendResourceName2},
objs: []apiruntime.Object{workerNodeSlice, podWithExtendedResourceName2},
want: want{
reserve: result{
inFlightClaims: []metav1.Object{extendedResourceClaimNoName2},
},
prebind: result{
assumedClaim: reserve(extendedResourceClaim2, podWithExtendedResourceName2),
added: []metav1.Object{reserve(extendedResourceClaim2, podWithExtendedResourceName2)},
},
postbind: result{
assumedClaim: reserve(extendedResourceClaim2, podWithExtendedResourceName2),
},
},
},
"extended-resource-name-with-zero-allocatable": {
enableDRAExtendedResource: true,
nodes: []*v1.Node{workerNodeWithExtendedResourceZeroAllocatable},
@ -3158,7 +3239,7 @@ func (m *mockDeviceClassResolver) GetDeviceClass(resourceName v1.ResourceName) *
return m.mapping[resourceName]
}
func Test_createDeviceRequests(t *testing.T) {
func Test_createRequestsAndMappings_requests(t *testing.T) {
pod1 := st.MakePod().Name(podName).Namespace(namespace).
UID(podUID).
Res(map[v1.ResourceName]string{
@ -3185,6 +3266,33 @@ func Test_createDeviceRequests(t *testing.T) {
v1.ResourceName(extendedResourceName + "init"): "2",
}).
Obj()
podInit2 := st.MakePod().Name(podName).Namespace(namespace).
UID(podUID).
Res(map[v1.ResourceName]string{
v1.ResourceName(extendedResourceName): "1",
}).
InitReq(map[v1.ResourceName]string{
v1.ResourceName(extendedResourceName + "init"): "1",
}).
InitReq(map[v1.ResourceName]string{
v1.ResourceName(extendedResourceName + "init"): "2",
}).
Obj()
podInit3 := st.MakePod().Name(podName).Namespace(namespace).
UID(podUID).
Res(map[v1.ResourceName]string{
v1.ResourceName(extendedResourceName): "1",
}).
InitReq(map[v1.ResourceName]string{
v1.ResourceName(extendedResourceName + "init"): "1",
}).
SidecarReq(map[v1.ResourceName]string{
v1.ResourceName(extendedResourceName + "init"): "1",
}).
InitReq(map[v1.ResourceName]string{
v1.ResourceName(extendedResourceName + "init"): "2",
}).
Obj()
res := map[v1.ResourceName]int64{
v1.ResourceName(extendedResourceName): 1,
@ -3260,7 +3368,23 @@ func Test_createDeviceRequests(t *testing.T) {
Count: 1,
},
}
devReqSidecar := resourceapi.DeviceRequest{
Name: "container-1-request-0",
Exactly: &resourceapi.ExactDeviceRequest{
DeviceClassName: "classInit",
AllocationMode: resourceapi.DeviceAllocationModeExactCount,
Count: 1,
},
}
devReq2Init := resourceapi.DeviceRequest{
Name: "container-1-request-0",
Exactly: &resourceapi.ExactDeviceRequest{
DeviceClassName: "classInit",
AllocationMode: resourceapi.DeviceAllocationModeExactCount,
Count: 2,
},
}
devReq6Init := resourceapi.DeviceRequest{
Name: "container-0-request-0",
Exactly: &resourceapi.ExactDeviceRequest{
DeviceClassName: "classInit",
@ -3268,6 +3392,30 @@ func Test_createDeviceRequests(t *testing.T) {
Count: 2,
},
}
devReq3Init := resourceapi.DeviceRequest{
Name: "container-2-request-0",
Exactly: &resourceapi.ExactDeviceRequest{
DeviceClassName: "class",
AllocationMode: resourceapi.DeviceAllocationModeExactCount,
Count: 1,
},
}
devReq4Init := resourceapi.DeviceRequest{
Name: "container-3-request-0",
Exactly: &resourceapi.ExactDeviceRequest{
DeviceClassName: "class",
AllocationMode: resourceapi.DeviceAllocationModeExactCount,
Count: 1,
},
}
devReq5Init := resourceapi.DeviceRequest{
Name: "container-2-request-0",
Exactly: &resourceapi.ExactDeviceRequest{
DeviceClassName: "classInit",
AllocationMode: resourceapi.DeviceAllocationModeExactCount,
Count: 2,
},
}
testcases := map[string]struct {
pod *v1.Pod
@ -3307,36 +3455,49 @@ func Test_createDeviceRequests(t *testing.T) {
pod: podInit,
extendedResources: resInit,
cache: &mockDeviceClassResolver{mapping: devMapInit},
wantDeviceRequests: []resourceapi.DeviceRequest{devReq2Init, devReqInit},
wantDeviceRequests: []resourceapi.DeviceRequest{devReq6Init, devReqInit},
},
"two init containers, one regular container": {
pod: podInit2,
extendedResources: resInit,
cache: &mockDeviceClassResolver{mapping: devMapInit},
wantDeviceRequests: []resourceapi.DeviceRequest{devReq2Init, devReq3Init},
},
"three init containers, one sidecar, one regular container": {
pod: podInit3,
extendedResources: resInit,
cache: &mockDeviceClassResolver{mapping: devMapInit},
wantDeviceRequests: []resourceapi.DeviceRequest{devReqSidecar, devReq5Init, devReq4Init},
},
}
for name, tc := range testcases {
t.Run(name, func(t *testing.T) {
gotDeviceRequests := createDeviceRequests(tc.pod, tc.extendedResources, tc.cache)
logger, _ := ktesting.NewTestContext(t)
gotDeviceRequests, _ := createRequestsAndMappings(tc.pod, tc.extendedResources, logger, tc.cache)
if len(tc.wantDeviceRequests) != len(gotDeviceRequests) {
t.Fatalf("different length, want %#v, got %#v", tc.wantDeviceRequests, gotDeviceRequests)
t.Fatalf("different length, want %#v, len=%v, got %#v, len=%v", tc.wantDeviceRequests, len(tc.wantDeviceRequests), gotDeviceRequests, len(gotDeviceRequests))
}
sort.Slice(gotDeviceRequests, func(i, j int) bool { return gotDeviceRequests[i].Name < gotDeviceRequests[j].Name })
for i, r := range tc.wantDeviceRequests {
if r.Name != gotDeviceRequests[i].Name {
t.Fatalf("different name, want %#v, got %#v", r, gotDeviceRequests[i])
t.Errorf("different name, want %#v, got %#v", r, gotDeviceRequests[i])
}
if r.Exactly.DeviceClassName != gotDeviceRequests[i].Exactly.DeviceClassName {
t.Fatalf("different deviceClassName, want %#v, got %#v", r, gotDeviceRequests[i])
t.Errorf("different deviceClassName, want %#v, got %#v", r, gotDeviceRequests[i])
}
if r.Exactly.AllocationMode != gotDeviceRequests[i].Exactly.AllocationMode {
t.Fatalf("different allocationMode, want %#v, got %#v", r, gotDeviceRequests[i])
t.Errorf("different allocationMode, want %#v, got %#v", r, gotDeviceRequests[i])
}
if r.Exactly.Count != gotDeviceRequests[i].Exactly.Count {
t.Fatalf("different count, want %#v, got %#v", r, gotDeviceRequests[i])
t.Errorf("different count, want %#v, got %#v", r.Exactly.Count, gotDeviceRequests[i].Exactly.Count)
}
}
})
}
}
func Test_createRequestMappings(t *testing.T) {
func Test_createRequestsAndMappings_mappings(t *testing.T) {
pod1 := st.MakePod().Name(podName).Namespace(namespace).
UID(podUID).
Res(map[v1.ResourceName]string{
@ -3344,6 +3505,13 @@ func Test_createRequestMappings(t *testing.T) {
v1.ResourceName(extendedResourceName + "1"): "2",
}).
Obj()
pod1InitImplicit := st.MakePod().Name(podName).Namespace(namespace).
UID(podUID).
InitReq(map[v1.ResourceName]string{
v1.ResourceName(extendedResourceName + "init"): "1",
v1.ResourceName(resourceapi.ResourceDeviceClassPrefix + "classInit"): "2",
}).
Obj()
pod2 := st.MakePod().Name(podName).Namespace(namespace).
UID(podUID).
Res(map[v1.ResourceName]string{
@ -3363,24 +3531,97 @@ func Test_createRequestMappings(t *testing.T) {
v1.ResourceName(extendedResourceName + "init"): "2",
}).
Obj()
claim := st.MakeResourceClaim().
Name(claimName).
Namespace(namespace).
RequestWithName("container-0-request-0", className).
podInit2 := st.MakePod().Name(podName).Namespace(namespace).
UID(podUID).
Res(map[v1.ResourceName]string{
v1.ResourceName(extendedResourceName): "1",
}).
InitReq(map[v1.ResourceName]string{
v1.ResourceName(extendedResourceName + "init"): "1",
}).
InitReq(map[v1.ResourceName]string{
v1.ResourceName(extendedResourceName + "init"): "2",
}).
Obj()
claim2 := st.MakeResourceClaim().
Name(claimName).
Namespace(namespace).
RequestWithName("container-0-request-0", className).
RequestWithName("container-1-request-0", className).
podInitImplicit := st.MakePod().Name(podName).Namespace(namespace).
UID(podUID).
Res(map[v1.ResourceName]string{
v1.ResourceName(extendedResourceName): "1",
}).
InitReq(map[v1.ResourceName]string{
v1.ResourceName(extendedResourceName + "init"): "1",
}).
InitReq(map[v1.ResourceName]string{
v1.ResourceName(resourceapi.ResourceDeviceClassPrefix + "classInit"): "2",
}).
Obj()
podInit3 := st.MakePod().Name(podName).Namespace(namespace).
UID(podUID).
Res(map[v1.ResourceName]string{
v1.ResourceName(extendedResourceName): "1",
}).
InitReq(map[v1.ResourceName]string{
v1.ResourceName(extendedResourceName + "init"): "1",
}).
SidecarReq(map[v1.ResourceName]string{
v1.ResourceName(extendedResourceName + "init"): "1",
}).
InitReq(map[v1.ResourceName]string{
v1.ResourceName(extendedResourceName + "init"): "2",
}).
Obj()
res := map[v1.ResourceName]int64{
v1.ResourceName(extendedResourceName): 1,
v1.ResourceName(extendedResourceName + "1"): 2,
}
resInit := map[v1.ResourceName]int64{
v1.ResourceName(extendedResourceName): 1,
v1.ResourceName(extendedResourceName + "init"): 2,
}
resInitImplicit := map[v1.ResourceName]int64{
v1.ResourceName(extendedResourceName): 1,
v1.ResourceName(extendedResourceName + "init"): 2,
v1.ResourceName(resourceapi.ResourceDeviceClassPrefix + "classInit"): 2,
}
devMap := map[v1.ResourceName]*resourceapi.DeviceClass{
v1.ResourceName(extendedResourceName): {
ObjectMeta: metav1.ObjectMeta{
Name: "class",
},
},
v1.ResourceName(extendedResourceName + "1"): {
ObjectMeta: metav1.ObjectMeta{
Name: "class1",
},
},
}
devMapInit := map[v1.ResourceName]*resourceapi.DeviceClass{
v1.ResourceName(extendedResourceName): {
ObjectMeta: metav1.ObjectMeta{
Name: "class",
},
},
v1.ResourceName(extendedResourceName + "init"): {
ObjectMeta: metav1.ObjectMeta{
Name: "classInit",
},
},
v1.ResourceName(resourceapi.ResourceDeviceClassPrefix + "classInit"): {
ObjectMeta: metav1.ObjectMeta{
Name: "classInit",
},
},
}
cer := v1.ContainerExtendedResourceRequest{
ContainerName: "con0",
ResourceName: extendedResourceName,
RequestName: "container-0-request-0",
}
cer1 := v1.ContainerExtendedResourceRequest{
ContainerName: "con0",
ResourceName: extendedResourceName + "1",
RequestName: "container-0-request-1",
}
cer2 := v1.ContainerExtendedResourceRequest{
ContainerName: "con1",
ResourceName: extendedResourceName + "1",
@ -3391,60 +3632,143 @@ func Test_createRequestMappings(t *testing.T) {
ResourceName: extendedResourceName,
RequestName: "container-1-request-0",
}
cer4 := v1.ContainerExtendedResourceRequest{
ContainerName: "con0",
ResourceName: extendedResourceName,
RequestName: "container-2-request-0",
}
cer5 := v1.ContainerExtendedResourceRequest{
ContainerName: "con0",
ResourceName: extendedResourceName,
RequestName: "container-3-request-0",
}
cerInit := v1.ContainerExtendedResourceRequest{
ContainerName: "init-con0",
ResourceName: extendedResourceName + "init",
RequestName: "container-1-request-0",
}
cerInit0 := v1.ContainerExtendedResourceRequest{
ContainerName: "init-con0",
ResourceName: extendedResourceName + "init",
RequestName: "container-0-request-0",
}
cerInit1 := v1.ContainerExtendedResourceRequest{
ContainerName: "init-con0",
ResourceName: extendedResourceName + "init",
RequestName: "container-1-request-0",
}
cerInit2 := v1.ContainerExtendedResourceRequest{
ContainerName: "init-con1",
ResourceName: extendedResourceName + "init",
RequestName: "container-1-request-0",
}
cerInit3 := v1.ContainerExtendedResourceRequest{
ContainerName: "init-con2",
ResourceName: extendedResourceName + "init",
RequestName: "container-2-request-0",
}
cerSidecar := v1.ContainerExtendedResourceRequest{
ContainerName: "sidecar-con1",
ResourceName: extendedResourceName + "init",
RequestName: "container-1-request-0",
}
cerInitImplicit := v1.ContainerExtendedResourceRequest{
ContainerName: "init-con0",
ResourceName: extendedResourceName + "init",
RequestName: "container-0-request-0",
}
cerInit4Implicit := v1.ContainerExtendedResourceRequest{
ContainerName: "init-con0",
ResourceName: extendedResourceName + "init",
RequestName: "container-0-request-1",
}
cerInit2Implicit := v1.ContainerExtendedResourceRequest{
ContainerName: "init-con1",
ResourceName: resourceapi.ResourceDeviceClassPrefix + "classInit",
RequestName: "container-1-request-0",
}
cerInit3Implicit := v1.ContainerExtendedResourceRequest{
ContainerName: "init-con0",
ResourceName: resourceapi.ResourceDeviceClassPrefix + "classInit",
RequestName: "container-0-request-0",
}
testcases := map[string]struct {
claim *resourceapi.ResourceClaim
pod *v1.Pod
wantReqMappings []v1.ContainerExtendedResourceRequest
pod *v1.Pod
extnededResources map[v1.ResourceName]int64
deviceClassMapping fwk.DeviceClassResolver
wantReqMappings []v1.ContainerExtendedResourceRequest
}{
"one container, one request": {
claim: claim,
pod: pod1,
wantReqMappings: []v1.ContainerExtendedResourceRequest{cer},
"one container, two requests": {
pod: pod1,
extnededResources: res,
deviceClassMapping: &mockDeviceClassResolver{devMap},
wantReqMappings: []v1.ContainerExtendedResourceRequest{cer, cer1},
},
"two containers, one request": {
claim: claim,
pod: pod2,
wantReqMappings: []v1.ContainerExtendedResourceRequest{cer},
},
"one init container, one regular container, one request": {
claim: claim,
pod: podInit,
wantReqMappings: []v1.ContainerExtendedResourceRequest{cerInit},
"one container, one explicit and one implicit request": {
pod: pod1InitImplicit,
extnededResources: resInitImplicit,
deviceClassMapping: &mockDeviceClassResolver{devMapInit},
wantReqMappings: []v1.ContainerExtendedResourceRequest{cerInit3Implicit, cerInit4Implicit},
},
"two containers, two requests": {
claim: claim2,
pod: pod2,
wantReqMappings: []v1.ContainerExtendedResourceRequest{cer, cer2},
pod: pod2,
extnededResources: res,
deviceClassMapping: &mockDeviceClassResolver{devMap},
wantReqMappings: []v1.ContainerExtendedResourceRequest{cer, cer2},
},
"two containers (one is init container), two requests": {
claim: claim2,
pod: podInit,
wantReqMappings: []v1.ContainerExtendedResourceRequest{cerInit, cer3},
"one init container, one regular container, one request": {
pod: podInit,
extnededResources: resInit,
deviceClassMapping: &mockDeviceClassResolver{devMapInit},
wantReqMappings: []v1.ContainerExtendedResourceRequest{cerInit0, cer3},
},
"three containers (two are init container), two requests": {
pod: podInit2,
extnededResources: resInit,
deviceClassMapping: &mockDeviceClassResolver{devMapInit},
wantReqMappings: []v1.ContainerExtendedResourceRequest{cerInit, cerInit2, cer4},
},
"three containers (two are init container), both explicit and implicit resources": {
pod: podInitImplicit,
extnededResources: resInitImplicit,
deviceClassMapping: &mockDeviceClassResolver{devMapInit},
wantReqMappings: []v1.ContainerExtendedResourceRequest{cerInitImplicit, cerInit2Implicit, cer4},
},
"four containers (two are init container, one sidecar), three requests": {
pod: podInit3,
extnededResources: resInit,
deviceClassMapping: &mockDeviceClassResolver{devMapInit},
wantReqMappings: []v1.ContainerExtendedResourceRequest{cerInit1, cerSidecar, cerInit3, cer5},
},
}
for name, tc := range testcases {
t.Run(name, func(t *testing.T) {
gotReqMappings := createRequestMappings(tc.claim, tc.pod)
logger, _ := ktesting.NewTestContext(t)
_, gotReqMappings := createRequestsAndMappings(tc.pod, tc.extnededResources, logger, tc.deviceClassMapping)
if len(tc.wantReqMappings) != len(gotReqMappings) {
t.Fatalf("different length, want %#v, got %#v", tc.wantReqMappings, gotReqMappings)
}
sort.Slice(gotReqMappings, func(i, j int) bool { return gotReqMappings[i].RequestName < gotReqMappings[j].RequestName })
sort.Slice(gotReqMappings, func(i, j int) bool {
if gotReqMappings[i].RequestName < gotReqMappings[j].RequestName {
return true
}
if gotReqMappings[i].RequestName > gotReqMappings[j].RequestName {
return false
}
return gotReqMappings[i].ContainerName < gotReqMappings[j].ContainerName
})
for i, r := range tc.wantReqMappings {
if r.RequestName != gotReqMappings[i].RequestName {
t.Fatalf("different request name, want %#v, got %#v", r, gotReqMappings[i])
t.Errorf("different request name, want %#v, got %#v", r, gotReqMappings[i])
}
if r.ContainerName != gotReqMappings[i].ContainerName {
t.Fatalf("different container name, want %#v, got %#v", r, gotReqMappings[i])
t.Errorf("different container name, want %#v, got %#v", r, gotReqMappings[i])
}
if r.ResourceName != gotReqMappings[i].ResourceName {
t.Fatalf("different resource name, want %#v, got %#v", r, gotReqMappings[i])
t.Errorf("different resource name, want %#v, got %#v", r, gotReqMappings[i])
}
}
})

View file

@ -2086,11 +2086,30 @@ var _ = framework.SIGDescribe("node")(framework.WithLabel("DRA"), func() {
extendedResourceTest := func(ctx context.Context, b *drautils.Builder, f *framework.Framework, resourceNames []string, containerEnv []string) {
pod := b.Pod()
res := v1.ResourceList{}
res2 := v1.ResourceList{}
for _, resourceName := range resourceNames {
res[v1.ResourceName(resourceName)] = resource.MustParse("1")
res2[v1.ResourceName(resourceName)] = resource.MustParse("2")
}
pod.Spec.Containers[0].Resources.Requests = res
pod.Spec.Containers[0].Resources.Limits = res
pod.Spec.InitContainers = []v1.Container{pod.Spec.Containers[0], pod.Spec.Containers[0], pod.Spec.Containers[0]}
pod.Spec.InitContainers[0].Name += "-init"
// This must succeed for the pod to start.
pod.Spec.InitContainers[0].Command = []string{"sh", "-c", "env|grep container_1_request_0=true"}
pod.Spec.InitContainers[0].Resources.Requests = res2
pod.Spec.InitContainers[0].Resources.Limits = res2
pod.Spec.InitContainers[1].Name += "-sidecar"
// This must succeed for the pod to start.
pod.Spec.InitContainers[1].Command = []string{"sh", "-c", "while true; do env; env|grep container_1_request_0=true; echo $?; sleep 5; done"}
pod.Spec.InitContainers[1].RestartPolicy = ptr.To(v1.ContainerRestartPolicyAlways)
pod.Spec.InitContainers[1].Resources.Requests = res
pod.Spec.InitContainers[1].Resources.Limits = res
pod.Spec.InitContainers[2].Name += "-init-1"
// This must succeed for the pod to start.
pod.Spec.InitContainers[2].Command = []string{"sh", "-c", "env|grep container_3_request_0=true"}
pod.Spec.InitContainers[2].Resources.Requests = res
pod.Spec.InitContainers[2].Resources.Limits = res
b.Create(ctx, pod)
err := e2epod.WaitForPodRunningInNamespace(ctx, f.ClientSet, pod)
@ -2153,8 +2172,8 @@ var _ = framework.SIGDescribe("node")(framework.WithLabel("DRA"), func() {
// b.ExtendedResourceName(0) is added to the deivce class with name: b.ClassName()+"0"
b.ExtendedResourceName(0),
}, []string{
"container_0_request_0", "true",
"container_0_request_1", "true",
"container_3_request_0", "true",
"container_3_request_1", "true",
})
})
@ -2162,7 +2181,7 @@ var _ = framework.SIGDescribe("node")(framework.WithLabel("DRA"), func() {
extendedResourceTest(ctx, b, f, []string{
b.ExtendedResourceName(0),
}, []string{
"container_0_request_0", "true",
"container_3_request_0", "true",
})
})
@ -2179,9 +2198,9 @@ var _ = framework.SIGDescribe("node")(framework.WithLabel("DRA"), func() {
b.ExtendedResourceName(1),
b.ExtendedResourceName(2),
}, []string{
"container_0_request_0", "true",
"container_0_request_1", "true",
"container_0_request_2", "true",
"container_3_request_0", "true",
"container_3_request_1", "true",
"container_3_request_2", "true",
})
})
ginkgo.It("must run a pod with extended resource with three containers one resource each", func(ctx context.Context) {