feat(dra): support dra device classes (#1759)

* feat(dra): support dra device classes

Signed-off-by: Hristo Hristov <me@hhristov.info>

* feat(dra): support dra device classes

Signed-off-by: Hristo Hristov <me@hhristov.info>

* feat(dra): support dra device classes

Signed-off-by: Hristo Hristov <me@hhristov.info>

* feat(dra): support dra device classes

Signed-off-by: Hristo Hristov <me@hhristov.info>

* feat(dra): support dra device classes

Signed-off-by: Hristo Hristov <me@hhristov.info>

* feat(dra): support dra device classes

Signed-off-by: Hristo Hristov <me@hhristov.info>

* feat(dra): support dra device classes

Signed-off-by: Hristo Hristov <me@hhristov.info>

* feat(dra): support dra device classes

Signed-off-by: Hristo Hristov <me@hhristov.info>

* feat(dra): support dra device classes

Signed-off-by: Hristo Hristov <me@hhristov.info>

* feat(dra): support dra device classes

Signed-off-by: Hristo Hristov <me@hhristov.info>

* feat(dra): support dra device classes

Signed-off-by: Hristo Hristov <me@hhristov.info>

* feat(dra): support dra device classes

Signed-off-by: Hristo Hristov <me@hhristov.info>

* feat(dra): support dra device classes

Signed-off-by: Hristo Hristov <me@hhristov.info>

* feat(dra): support dra device classes

Signed-off-by: Hristo Hristov <me@hhristov.info>

* feat(dra): support dra device classes

Signed-off-by: Hristo Hristov <me@hhristov.info>

* feat(dra): support dra device classes

Signed-off-by: Hristo Hristov <me@hhristov.info>

* feat(dra): support dra device classes

Signed-off-by: Hristo Hristov <me@hhristov.info>

---------

Signed-off-by: Hristo Hristov <me@hhristov.info>
This commit is contained in:
Hristo Hristov
2025-12-04 09:57:45 +02:00
committed by GitHub
parent 2f9e6c15e8
commit dd39e1a6d5
19 changed files with 1037 additions and 1 deletions

View File

@@ -70,6 +70,8 @@ type TenantAvailableClassesStatus struct {
RuntimeClasses []string `json:"runtime,omitempty"`
// Available GatewayClasses
GatewayClasses []string `json:"gateway,omitempty"`
// Available DeviceClasses
DeviceClasses []string `json:"device,omitempty"`
}
func (ms *TenantStatus) GetInstance(stat *TenantStatusNamespaceItem) *TenantStatusNamespaceItem {

View File

@@ -58,6 +58,8 @@ type TenantSpec struct {
// A default value can be specified, and all the Pod resources created will inherit the declared class.
// Optional.
PriorityClasses *api.DefaultAllowedListSpec `json:"priorityClasses,omitempty"`
// Specifies options for the DeviceClass resources.
DeviceClasses *api.SelectorAllowedListSpec `json:"deviceClasses,omitempty"`
// Specifies options for the GatewayClass resources.
GatewayOptions GatewayOptions `json:"gatewayOptions,omitempty"`
// Toggling the Tenant resources cordoning, when enable resources cannot be deleted.

View File

@@ -949,6 +949,11 @@ func (in *TenantAvailableClassesStatus) DeepCopyInto(out *TenantAvailableClasses
*out = make([]string, len(*in))
copy(*out, *in)
}
if in.DeviceClasses != nil {
in, out := &in.DeviceClasses, &out.DeviceClasses
*out = make([]string, len(*in))
copy(*out, *in)
}
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new TenantAvailableClassesStatus.
@@ -1275,6 +1280,11 @@ func (in *TenantSpec) DeepCopyInto(out *TenantSpec) {
*out = new(api.DefaultAllowedListSpec)
(*in).DeepCopyInto(*out)
}
if in.DeviceClasses != nil {
in, out := &in.DeviceClasses, &out.DeviceClasses
*out = new(api.SelectorAllowedListSpec)
(*in).DeepCopyInto(*out)
}
in.GatewayOptions.DeepCopyInto(&out.GatewayOptions)
if in.ForceTenantPrefix != nil {
in, out := &in.ForceTenantPrefix, &out.ForceTenantPrefix

View File

@@ -182,6 +182,13 @@ The following Values have changed key or Value:
| webhooks.hooks.defaults.ingress | object | `{}` | Deprecated, use webhooks.hooks.ingresses instead |
| webhooks.hooks.defaults.pods | object | `{}` | Deprecated, use webhooks.hooks.pods instead |
| webhooks.hooks.defaults.pvc | object | `{}` | Deprecated, use webhooks.hooks.persistentvolumeclaims instead |
| webhooks.hooks.devices.enabled | bool | `true` | Enable the Hook |
| webhooks.hooks.devices.failurePolicy | string | `"Fail"` | [FailurePolicy](https://kubernetes.io/docs/reference/access-authn-authz/extensible-admission-controllers/#failure-policy) |
| webhooks.hooks.devices.matchConditions | list | `[]` | [MatchConditions](https://kubernetes.io/docs/reference/access-authn-authz/extensible-admission-controllers/#matching-requests-matchpolicy) |
| webhooks.hooks.devices.matchPolicy | string | `"Equivalent"` | [MatchPolicy](https://kubernetes.io/docs/reference/access-authn-authz/extensible-admission-controllers/#matching-requests-matchpolicy) |
| webhooks.hooks.devices.namespaceSelector | object | `{"matchExpressions":[{"key":"capsule.clastix.io/tenant","operator":"Exists"}]}` | [NamespaceSelector](https://kubernetes.io/docs/reference/access-authn-authz/extensible-admission-controllers/#matching-requests-namespaceselector) |
| webhooks.hooks.devices.objectSelector | object | `{}` | [ObjectSelector](https://kubernetes.io/docs/reference/access-authn-authz/extensible-admission-controllers/#matching-requests-objectselector) |
| webhooks.hooks.devices.reinvocationPolicy | string | `"Never"` | [ReinvocationPolicy](https://kubernetes.io/docs/reference/access-authn-authz/extensible-admission-controllers/#reinvocation-policy) |
| webhooks.hooks.gateways.enabled | bool | `true` | Enable the Hook |
| webhooks.hooks.gateways.failurePolicy | string | `"Fail"` | [FailurePolicy](https://kubernetes.io/docs/reference/access-authn-authz/extensible-admission-controllers/#failure-policy) |
| webhooks.hooks.gateways.matchConditions | list | `[]` | [MatchConditions](https://kubernetes.io/docs/reference/access-authn-authz/extensible-admission-controllers/#matching-requests-matchpolicy) |

View File

@@ -1193,6 +1193,61 @@ spec:
description: Toggling the Tenant resources cordoning, when enable
resources cannot be deleted.
type: boolean
deviceClasses:
description: Specifies options for the DeviceClass resources.
properties:
allowed:
description: Match exact elements which are allowed as class names
within this tenant
items:
type: string
type: array
allowedRegex:
description: Match elements by regex (DEPRECATED)
type: string
matchExpressions:
description: matchExpressions is a list of label selector requirements.
The requirements are ANDed.
items:
description: |-
A label selector requirement is a selector that contains values, a key, and an operator that
relates the key and values.
properties:
key:
description: key is the label key that the selector applies
to.
type: string
operator:
description: |-
operator represents a key's relationship to a set of values.
Valid operators are In, NotIn, Exists and DoesNotExist.
type: string
values:
description: |-
values is an array of string values. If the operator is In or NotIn,
the values array must be non-empty. If the operator is Exists or DoesNotExist,
the values array must be empty. This array is replaced during a strategic
merge patch.
items:
type: string
type: array
x-kubernetes-list-type: atomic
required:
- key
- operator
type: object
type: array
x-kubernetes-list-type: atomic
matchLabels:
additionalProperties:
type: string
description: |-
matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels
map is equivalent to an element of matchExpressions, whose key field is "key", the
operator is "In", and the values array contains only "value". The requirements are ANDed.
type: object
type: object
x-kubernetes-map-type: atomic
forceTenantPrefix:
description: |-
Use this if you want to disable/enable the Tenant name prefix to specific Tenants, overriding global forceTenantPrefix in CapsuleConfiguration.
@@ -2571,6 +2626,11 @@ spec:
classes:
description: Available Class Types within Tenant
properties:
device:
description: Available DeviceClasses
items:
type: string
type: array
gateway:
description: Available GatewayClasses
items:

View File

@@ -44,6 +44,44 @@ webhooks:
timeoutSeconds: {{ $.Values.webhooks.validatingWebhooksTimeoutSeconds }}
{{- end }}
{{- end }}
{{- with .Values.webhooks.hooks.devices }}
{{- if .enabled }}
- name: devices.projectcapsule.dev
admissionReviewVersions:
- v1
- v1beta1
clientConfig:
{{- include "capsule.webhooks.service" (dict "path" "/devices" "ctx" $) | nindent 4 }}
failurePolicy: {{ .failurePolicy }}
matchPolicy: {{ .matchPolicy }}
{{- with .namespaceSelector }}
namespaceSelector:
{{- toYaml . | nindent 4 }}
{{- end }}
{{- with .objectSelector }}
objectSelector:
{{- toYaml . | nindent 4 }}
{{- end }}
{{- with .matchConditions }}
matchConditions:
{{- toYaml . | nindent 4 }}
{{- end }}
rules:
- apiGroups:
- resource.k8s.io
apiVersions:
- v1
operations:
- CREATE
- UPDATE
resources:
- resourceclaimtemplates
- resourceclaims
scope: Namespaced
sideEffects: None
timeoutSeconds: {{ $.Values.webhooks.validatingWebhooksTimeoutSeconds }}
{{- end }}
{{- end }}
{{- with .Values.webhooks.hooks.gateways }}
{{- if .enabled }}
- name: gateway.projectcapsule.dev

View File

@@ -882,6 +882,55 @@
}
}
},
"devices": {
"type": "object",
"properties": {
"enabled": {
"description": "Enable the Hook",
"type": "boolean"
},
"failurePolicy": {
"description": "[FailurePolicy](https://kubernetes.io/docs/reference/access-authn-authz/extensible-admission-controllers/#failure-policy)",
"type": "string"
},
"matchConditions": {
"description": "[MatchConditions](https://kubernetes.io/docs/reference/access-authn-authz/extensible-admission-controllers/#matching-requests-matchpolicy)",
"type": "array"
},
"matchPolicy": {
"description": "[MatchPolicy](https://kubernetes.io/docs/reference/access-authn-authz/extensible-admission-controllers/#matching-requests-matchpolicy)",
"type": "string"
},
"namespaceSelector": {
"description": "[NamespaceSelector](https://kubernetes.io/docs/reference/access-authn-authz/extensible-admission-controllers/#matching-requests-namespaceselector)",
"type": "object",
"properties": {
"matchExpressions": {
"type": "array",
"items": {
"type": "object",
"properties": {
"key": {
"type": "string"
},
"operator": {
"type": "string"
}
}
}
}
}
},
"objectSelector": {
"description": "[ObjectSelector](https://kubernetes.io/docs/reference/access-authn-authz/extensible-admission-controllers/#matching-requests-objectselector)",
"type": "object"
},
"reinvocationPolicy": {
"description": "[ReinvocationPolicy](https://kubernetes.io/docs/reference/access-authn-authz/extensible-admission-controllers/#reinvocation-policy)",
"type": "string"
}
}
},
"gateways": {
"type": "object",
"properties": {

View File

@@ -565,7 +565,24 @@ webhooks:
matchConditions: []
# -- [ReinvocationPolicy](https://kubernetes.io/docs/reference/access-authn-authz/extensible-admission-controllers/#reinvocation-policy)
reinvocationPolicy: Never
devices:
# -- Enable the Hook
enabled: true
# -- [FailurePolicy](https://kubernetes.io/docs/reference/access-authn-authz/extensible-admission-controllers/#failure-policy)
failurePolicy: Fail
# -- [MatchPolicy](https://kubernetes.io/docs/reference/access-authn-authz/extensible-admission-controllers/#matching-requests-matchpolicy)
matchPolicy: Equivalent
# -- [ObjectSelector](https://kubernetes.io/docs/reference/access-authn-authz/extensible-admission-controllers/#matching-requests-objectselector)
objectSelector: {}
# -- [NamespaceSelector](https://kubernetes.io/docs/reference/access-authn-authz/extensible-admission-controllers/#matching-requests-namespaceselector)
namespaceSelector:
matchExpressions:
- key: capsule.clastix.io/tenant
operator: Exists
# -- [MatchConditions](https://kubernetes.io/docs/reference/access-authn-authz/extensible-admission-controllers/#matching-requests-matchpolicy)
matchConditions: []
# -- [ReinvocationPolicy](https://kubernetes.io/docs/reference/access-authn-authz/extensible-admission-controllers/#reinvocation-policy)
reinvocationPolicy: Never
networkpolicies:
# -- Enable the Hook
enabled: true

View File

@@ -44,6 +44,7 @@ import (
"github.com/projectcapsule/capsule/internal/metrics"
"github.com/projectcapsule/capsule/internal/webhook"
"github.com/projectcapsule/capsule/internal/webhook/defaults"
"github.com/projectcapsule/capsule/internal/webhook/dra"
"github.com/projectcapsule/capsule/internal/webhook/gateway"
"github.com/projectcapsule/capsule/internal/webhook/ingress"
"github.com/projectcapsule/capsule/internal/webhook/misc"
@@ -267,6 +268,7 @@ func main() {
),
route.CustomResources(tenantvalidation.ResourceCounterHandler(manager.GetClient())),
route.Gateway(gateway.Class(cfg)),
route.DeviceClass(dra.DeviceClass()),
route.Defaults(defaults.Handler(cfg, kubeVersion)),
route.TenantMutation(
tenantmutation.MetaHandler(),

535
e2e/device_class_test.go Normal file
View File

@@ -0,0 +1,535 @@
// Copyright 2020-2023 Project Capsule Authors.
// SPDX-License-Identifier: Apache-2.0
package e2e
import (
"context"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
capsulev1beta2 "github.com/projectcapsule/capsule/api/v1beta2"
"github.com/projectcapsule/capsule/pkg/api"
resources "k8s.io/api/resource/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/selection"
"k8s.io/apimachinery/pkg/types"
"sigs.k8s.io/controller-runtime/pkg/client"
)
var _ = Describe("when Tenant handles Device classes", Label("tenant", "classes", "device"), func() {
erm := "nvidia.com/gpu"
authorized := &resources.DeviceClass{
ObjectMeta: metav1.ObjectMeta{
Name: "gpu.example.com",
Labels: map[string]string{
"env": "authorized",
},
},
Spec: resources.DeviceClassSpec{
Selectors: []resources.DeviceSelector{
{
CEL: &resources.CELDeviceSelector{
Expression: "device.driver == 'gpu.example.com' && device.attributes['gpu.example.com'].type == 'gpu'",
},
},
},
ExtendedResourceName: &erm,
},
}
authorized2 := &resources.DeviceClass{
ObjectMeta: metav1.ObjectMeta{
Name: "gpu2.example.com",
Labels: map[string]string{
"env": "authorized",
},
},
Spec: resources.DeviceClassSpec{
Selectors: []resources.DeviceSelector{
{
CEL: &resources.CELDeviceSelector{
Expression: "device.driver == 'gpu.example.com' && device.attributes['gpu.example.com'].type == 'gpu'",
},
},
},
ExtendedResourceName: &erm,
},
}
unauthorized := &resources.DeviceClass{
ObjectMeta: metav1.ObjectMeta{
Name: "gpu3.example.com",
Labels: map[string]string{
"env": "unauthorized",
},
},
Spec: resources.DeviceClassSpec{
Selectors: []resources.DeviceSelector{
{
CEL: &resources.CELDeviceSelector{
Expression: "device.driver == 'gpu.example.com' && device.attributes['gpu.example.com'].type == 'gpu'",
},
},
},
ExtendedResourceName: &erm,
},
}
tntWithAuthorized := &capsulev1beta2.Tenant{
ObjectMeta: metav1.ObjectMeta{
Name: "e2e-authorized-deviceclass",
},
Spec: capsulev1beta2.TenantSpec{
Owners: []api.OwnerSpec{
{
CoreOwnerSpec: api.CoreOwnerSpec{
UserSpec: api.UserSpec{
Name: "authorized-deviceclass",
Kind: "User",
},
},
},
},
DeviceClasses: &api.SelectorAllowedListSpec{
LabelSelector: v1.LabelSelector{
MatchLabels: map[string]string{
"env": "authorized",
},
},
},
},
}
tntWithUnauthorized := &capsulev1beta2.Tenant{
ObjectMeta: metav1.ObjectMeta{
Name: "e2e-unauthorized-deviceclass",
},
Spec: capsulev1beta2.TenantSpec{
Owners: []api.OwnerSpec{
{
CoreOwnerSpec: api.CoreOwnerSpec{
UserSpec: api.UserSpec{
Name: "unauthorized-deviceclass",
Kind: "User",
},
},
},
},
DeviceClasses: &api.SelectorAllowedListSpec{
LabelSelector: v1.LabelSelector{
MatchLabels: map[string]string{
"env": "production",
},
},
},
},
}
JustBeforeEach(func() {
for _, tnt := range []*capsulev1beta2.Tenant{tntWithAuthorized, tntWithUnauthorized} {
tnt.ResourceVersion = ""
EventuallyCreation(func() error {
return k8sClient.Create(context.TODO(), tnt)
}).Should(Succeed())
}
for _, crd := range []*resources.DeviceClass{authorized, authorized2, unauthorized} {
crd.ResourceVersion = ""
EventuallyCreation(func() error {
return k8sClient.Create(context.TODO(), crd)
}).Should(Succeed())
}
})
JustAfterEach(func() {
for _, tnt := range []*capsulev1beta2.Tenant{tntWithAuthorized, tntWithUnauthorized} {
EventuallyCreation(func() error {
return ignoreNotFound(k8sClient.Delete(context.TODO(), tnt))
}).Should(Succeed())
}
Eventually(func() (err error) {
req, _ := labels.NewRequirement("env", selection.Exists, nil)
return k8sClient.DeleteAllOf(context.TODO(), &resources.DeviceClass{}, &client.DeleteAllOfOptions{
ListOptions: client.ListOptions{
LabelSelector: labels.NewSelector().Add(*req),
},
})
}, defaultTimeoutInterval, defaultPollInterval).Should(Succeed())
})
It("ResourceClaims", func() {
By("Verify Status (Creation)", func() {
Eventually(func() ([]string, error) {
t := &capsulev1beta2.Tenant{}
if err := k8sClient.Get(
context.TODO(),
types.NamespacedName{Name: tntWithAuthorized.GetName()},
t,
); err != nil {
return nil, err
}
return t.Status.Classes.DeviceClasses, nil
}, defaultTimeoutInterval, defaultPollInterval).
Should(ConsistOf(authorized.GetName(), authorized2.GetName()))
})
ns := NewNamespace("")
NamespaceCreation(ns, tntWithAuthorized.Spec.Owners[0].UserSpec, defaultTimeoutInterval).Should(Succeed())
TenantNamespaceList(tntWithAuthorized, defaultTimeoutInterval).Should(ContainElement(ns.GetName()))
By("providing authorized device class", func() {
for _, class := range []*resources.DeviceClass{authorized} {
Eventually(func() (err error) {
g := &resources.ResourceClaim{
ObjectMeta: metav1.ObjectMeta{
Name: class.GetName() + "-resource-claim",
Namespace: ns.GetName(),
},
Spec: resources.ResourceClaimSpec{
Devices: resources.DeviceClaim{
Requests: []resources.DeviceRequest{
{
Name: "authorized-device-class-resource-claim",
Exactly: &resources.ExactDeviceRequest{
DeviceClassName: "gpu.example.com",
Selectors: []resources.DeviceSelector{
{
CEL: &resources.CELDeviceSelector{
Expression: "device.driver == 'gpu.example.com' && device.attributes['gpu.example.com'].type == 'gpu'",
},
},
},
},
},
},
},
},
}
err = k8sClient.Create(context.TODO(), g)
return
}, defaultTimeoutInterval, defaultPollInterval).Should(Succeed())
}
})
By("providing unauthorized device class", func() {
for _, class := range []*resources.DeviceClass{unauthorized} {
Eventually(func() (err error) {
g := &resources.ResourceClaim{
ObjectMeta: metav1.ObjectMeta{
Name: class.GetName() + "-resource-claim",
Namespace: ns.GetName(),
},
Spec: resources.ResourceClaimSpec{
Devices: resources.DeviceClaim{
Requests: []resources.DeviceRequest{
{
Name: "unauthorized-device-class-resource-claim",
Exactly: &resources.ExactDeviceRequest{
DeviceClassName: "gpu3.example.com",
Selectors: []resources.DeviceSelector{
{
CEL: &resources.CELDeviceSelector{
Expression: "device.driver == 'gpu.example.com' && device.attributes['gpu.example.com'].type == 'gpu'",
},
},
},
},
},
},
},
},
}
err = k8sClient.Create(context.TODO(), g)
return
}, defaultTimeoutInterval, defaultPollInterval).ShouldNot(Succeed())
}
})
By("providing non-existent device class", func() {
for _, class := range []*resources.DeviceClass{unauthorized} {
Eventually(func() (err error) {
g := &resources.ResourceClaim{
ObjectMeta: metav1.ObjectMeta{
Name: class.GetName() + "-resource-claim",
Namespace: ns.GetName(),
},
Spec: resources.ResourceClaimSpec{
Devices: resources.DeviceClaim{
Requests: []resources.DeviceRequest{
{
Name: "missing-device-class-resource-claim",
Exactly: &resources.ExactDeviceRequest{
DeviceClassName: "gpu53.example.com",
Selectors: []resources.DeviceSelector{
{
CEL: &resources.CELDeviceSelector{
Expression: "device.driver == 'gpu.example.com' && device.attributes['gpu.example.com'].type == 'gpu'",
},
},
},
},
},
},
},
},
}
err = k8sClient.Create(context.TODO(), g)
return
}, defaultTimeoutInterval, defaultPollInterval).ShouldNot(Succeed())
}
})
By("Verify Status (Deletion)", func() {
for _, class := range []*resources.DeviceClass{authorized} {
Expect(ignoreNotFound(k8sClient.Delete(context.TODO(), class))).To(Succeed())
}
Eventually(func() ([]string, error) {
t := &capsulev1beta2.Tenant{}
if err := k8sClient.Get(
context.TODO(),
types.NamespacedName{Name: tntWithAuthorized.GetName()},
t,
); err != nil {
return nil, err
}
return t.Status.Classes.DeviceClasses, nil
}, defaultTimeoutInterval, defaultPollInterval).
ShouldNot(ConsistOf(authorized.GetName(), authorized2.GetName()))
})
})
It("ResourceClaimTemplates", func() {
ns := NewNamespace("")
NamespaceCreation(ns, tntWithAuthorized.Spec.Owners[0].UserSpec, defaultTimeoutInterval).Should(Succeed())
TenantNamespaceList(tntWithAuthorized, defaultTimeoutInterval).Should(ContainElement(ns.GetName()))
By("providing authorized device class", func() {
for _, class := range []*resources.DeviceClass{authorized} {
Eventually(func() (err error) {
g := &resources.ResourceClaimTemplate{
ObjectMeta: metav1.ObjectMeta{
Name: class.GetName() + "-resource-claim",
Namespace: ns.GetName(),
},
Spec: resources.ResourceClaimTemplateSpec{
Spec: resources.ResourceClaimSpec{
Devices: resources.DeviceClaim{
Requests: []resources.DeviceRequest{
{
Name: "authorized-device-class-resource-claim",
Exactly: &resources.ExactDeviceRequest{
DeviceClassName: "gpu.example.com",
Selectors: []resources.DeviceSelector{
{
CEL: &resources.CELDeviceSelector{
Expression: "device.driver == 'gpu.example.com' && device.attributes['gpu.example.com'].type == 'gpu'",
},
},
},
},
},
},
},
},
},
}
err = k8sClient.Create(context.TODO(), g)
return
}, defaultTimeoutInterval, defaultPollInterval).Should(Succeed())
}
})
By("providing unauthorized device class", func() {
for _, class := range []*resources.DeviceClass{unauthorized} {
Eventually(func() (err error) {
g := &resources.ResourceClaimTemplate{
ObjectMeta: metav1.ObjectMeta{
Name: class.GetName() + "-resource-claim",
Namespace: ns.GetName(),
},
Spec: resources.ResourceClaimTemplateSpec{
Spec: resources.ResourceClaimSpec{
Devices: resources.DeviceClaim{
Requests: []resources.DeviceRequest{
{
Name: "unauthorized-device-class-resource-claim",
Exactly: &resources.ExactDeviceRequest{
DeviceClassName: "gpu3.example.com",
Selectors: []resources.DeviceSelector{
{
CEL: &resources.CELDeviceSelector{
Expression: "device.driver == 'gpu.example.com' && device.attributes['gpu.example.com'].type == 'gpu'",
},
},
},
},
},
},
},
},
},
}
err = k8sClient.Create(context.TODO(), g)
return
}, defaultTimeoutInterval, defaultPollInterval).ShouldNot(Succeed())
}
})
By("providing both authorized and unauthorized device classes", func() {
for _, class := range []*resources.DeviceClass{unauthorized} {
Eventually(func() (err error) {
g := &resources.ResourceClaimTemplate{
ObjectMeta: metav1.ObjectMeta{
Name: class.GetName() + "-resource-claim",
Namespace: ns.GetName(),
},
Spec: resources.ResourceClaimTemplateSpec{
Spec: resources.ResourceClaimSpec{
Devices: resources.DeviceClaim{
Requests: []resources.DeviceRequest{
{
Name: "unauthorized-device-class-resource-claim",
Exactly: &resources.ExactDeviceRequest{
DeviceClassName: "gpu3.example.com",
Selectors: []resources.DeviceSelector{
{
CEL: &resources.CELDeviceSelector{
Expression: "device.driver == 'gpu.example.com' && device.attributes['gpu.example.com'].type == 'gpu'",
},
},
},
},
},
{
Name: "authorized-device-class-resource-claim",
Exactly: &resources.ExactDeviceRequest{
DeviceClassName: "gpu.example.com",
Selectors: []resources.DeviceSelector{
{
CEL: &resources.CELDeviceSelector{
Expression: "device.driver == 'gpu.example.com' && device.attributes['gpu.example.com'].type == 'gpu'",
},
},
},
},
},
},
},
},
},
}
err = k8sClient.Create(context.TODO(), g)
return
}, defaultTimeoutInterval, defaultPollInterval).ShouldNot(Succeed())
}
})
By("providing authorized and missing device classes", func() {
for _, class := range []*resources.DeviceClass{unauthorized} {
Eventually(func() (err error) {
g := &resources.ResourceClaimTemplate{
ObjectMeta: metav1.ObjectMeta{
Name: class.GetName() + "-resource-claim",
Namespace: ns.GetName(),
},
Spec: resources.ResourceClaimTemplateSpec{
Spec: resources.ResourceClaimSpec{
Devices: resources.DeviceClaim{
Requests: []resources.DeviceRequest{
{
Name: "missing-device-class-resource-claim",
Exactly: &resources.ExactDeviceRequest{
DeviceClassName: "gpu63.example.com",
Selectors: []resources.DeviceSelector{
{
CEL: &resources.CELDeviceSelector{
Expression: "device.driver == 'gpu.example.com' && device.attributes['gpu.example.com'].type == 'gpu'",
},
},
},
},
},
{
Name: "authorized-device-class-resource-claim",
Exactly: &resources.ExactDeviceRequest{
DeviceClassName: "gpu.example.com",
Selectors: []resources.DeviceSelector{
{
CEL: &resources.CELDeviceSelector{
Expression: "device.driver == 'gpu.example.com' && device.attributes['gpu.example.com'].type == 'gpu'",
},
},
},
},
},
},
},
},
},
}
err = k8sClient.Create(context.TODO(), g)
return
}, defaultTimeoutInterval, defaultPollInterval).ShouldNot(Succeed())
}
})
By("providing two authorized device classes", func() {
for _, class := range []*resources.DeviceClass{unauthorized} {
Eventually(func() (err error) {
g := &resources.ResourceClaimTemplate{
ObjectMeta: metav1.ObjectMeta{
Name: class.GetName() + "-resource-claim",
Namespace: ns.GetName(),
},
Spec: resources.ResourceClaimTemplateSpec{
Spec: resources.ResourceClaimSpec{
Devices: resources.DeviceClaim{
Requests: []resources.DeviceRequest{
{
Name: "unauthorized-device-class-resource-claim",
Exactly: &resources.ExactDeviceRequest{
DeviceClassName: "gpu2.example.com",
Selectors: []resources.DeviceSelector{
{
CEL: &resources.CELDeviceSelector{
Expression: "device.driver == 'gpu.example.com' && device.attributes['gpu.example.com'].type == 'gpu'",
},
},
},
},
},
{
Name: "authorized-device-class-resource-claim",
Exactly: &resources.ExactDeviceRequest{
DeviceClassName: "gpu.example.com",
Selectors: []resources.DeviceSelector{
{
CEL: &resources.CELDeviceSelector{
Expression: "device.driver == 'gpu.example.com' && device.attributes['gpu.example.com'].type == 'gpu'",
},
},
},
},
},
},
},
},
},
}
err = k8sClient.Create(context.TODO(), g)
return
}, defaultTimeoutInterval, defaultPollInterval).ShouldNot(Succeed())
}
})
})
})

1
go.mod
View File

@@ -19,6 +19,7 @@ require (
k8s.io/apimachinery v0.34.2
k8s.io/apiserver v0.34.2
k8s.io/client-go v0.34.2
k8s.io/dynamic-resource-allocation v0.34.2
k8s.io/utils v0.0.0-20251002143259-bc988d571ff4
sigs.k8s.io/cluster-api v1.11.3
sigs.k8s.io/controller-runtime v0.22.4

2
go.sum
View File

@@ -319,6 +319,8 @@ k8s.io/cluster-bootstrap v0.33.3 h1:u2NTxJ5CFSBFXaDxLQoOWMly8eni31psVso+caq6uwI=
k8s.io/cluster-bootstrap v0.33.3/go.mod h1:p970f8u8jf273zyQ5raD8WUu2XyAl0SAWOY82o7i/ds=
k8s.io/component-base v0.34.2 h1:HQRqK9x2sSAsd8+R4xxRirlTjowsg6fWCPwWYeSvogQ=
k8s.io/component-base v0.34.2/go.mod h1:9xw2FHJavUHBFpiGkZoKuYZ5pdtLKe97DEByaA+hHbM=
k8s.io/dynamic-resource-allocation v0.34.2 h1:SjlRGSWl6CZXoJwQNL+Y0wRfdH8PkJ4mHRNK6MMj0bY=
k8s.io/dynamic-resource-allocation v0.34.2/go.mod h1:ul6I+gfrCmC+OCuVdN0/iykyB2sPrIqh2WyKQ3RQPCU=
k8s.io/klog/v2 v2.130.1 h1:n9Xl7H1Xvksem4KFG4PYbdQCQxqc/tTUyrgXaOhHSzk=
k8s.io/klog/v2 v2.130.1/go.mod h1:3Jpz1GvMt720eyJH1ckRHK1EDfpxISzJ7I9OYgaDtPE=
k8s.io/kube-openapi v0.0.0-20250910181357-589584f1c912 h1:Y3gxNAuB0OBLImH611+UDZcmKS3g6CthxToOb37KgwE=

View File

@@ -12,6 +12,7 @@ import (
networkingv1 "k8s.io/api/networking/v1"
nodev1 "k8s.io/api/node/v1"
rbacv1 "k8s.io/api/rbac/v1"
resources "k8s.io/api/resource/v1"
schedulingv1 "k8s.io/api/scheduling/v1"
storagev1 "k8s.io/api/storage/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
@@ -72,6 +73,15 @@ func (r *Manager) SetupWithManager(mgr ctrl.Manager, ctrlConfig utils.Controller
&corev1.Namespace{},
handler.EnqueueRequestForOwner(mgr.GetScheme(), mgr.GetRESTMapper(), &capsulev1beta2.Tenant{}),
).
Watches(
&resources.DeviceClass{},
r.statusOnlyHandlerClasses(
r.reconcileClassStatus,
r.collectAvailableDeviceClasses,
"cannot collect device classes",
),
builder.WithPredicates(utils.UpdatedMetadataPredicate),
).
Watches(
&storagev1.StorageClass{},
r.statusOnlyHandlerClasses(

View File

@@ -9,6 +9,7 @@ import (
"sort"
nodev1 "k8s.io/api/node/v1"
resources "k8s.io/api/resource/v1"
schedulingv1 "k8s.io/api/scheduling/v1"
storagev1 "k8s.io/api/storage/v1"
"k8s.io/apimachinery/pkg/api/meta"
@@ -72,6 +73,14 @@ func (r Manager) reconcileClassStatus(
func (r *Manager) collectAvailableResources(ctx context.Context, tnt *capsulev1beta2.Tenant) (err error) {
log := log.FromContext(ctx)
log.V(5).Info("collecting available deviceclasses")
if err = r.collectAvailableDeviceClasses(ctx, tnt); err != nil {
return err
}
log.V(5).Info("collected available deviceclasses", "size", len(tnt.Status.Classes.DeviceClasses))
log.V(5).Info("collecting available storageclasses")
if err = r.collectAvailableStorageClasses(ctx, tnt); err != nil {
@@ -101,6 +110,19 @@ func (r *Manager) collectAvailableResources(ctx context.Context, tnt *capsulev1b
return nil
}
func (r *Manager) collectAvailableDeviceClasses(ctx context.Context, tnt *capsulev1beta2.Tenant) (err error) {
if tnt.Status.Classes.DeviceClasses, err = listObjectNamesBySelector2(
ctx,
r.Client,
tnt.Spec.DeviceClasses,
&resources.DeviceClassList{},
); err != nil {
return err
}
return nil
}
func (r *Manager) collectAvailableStorageClasses(ctx context.Context, tnt *capsulev1beta2.Tenant) (err error) {
if tnt.Status.Classes.StorageClasses, err = listObjectNamesBySelector(
ctx,
@@ -246,3 +268,92 @@ func listObjectNamesBySelector(
return objects, nil
}
func listObjectNamesBySelector2(
ctx context.Context,
c client.Client,
allowed *api.SelectorAllowedListSpec,
list client.ObjectList,
opts ...client.ListOption,
) ([]string, error) {
if err := c.List(ctx, list, opts...); err != nil {
return nil, err
}
objs, err := meta.ExtractList(list)
if err != nil {
return nil, err
}
objects := make([]string, 0)
allNames := make(map[string]struct{})
selected := make(map[string]struct{})
hasSelector := false
if allowed != nil {
hasSelector = len(allowed.MatchLabels) > 0 ||
len(allowed.MatchExpressions) > 0
}
if allowed == nil || (!hasSelector && len(allowed.Exact) == 0) {
for _, o := range objs {
accessor, err := meta.Accessor(o)
if err != nil {
return nil, err
}
objects = append(objects, accessor.GetName())
}
sort.Strings(objects)
return objects, nil
}
// Prepare selector
var sel labels.Selector
if hasSelector {
sel, err = metav1.LabelSelectorAsSelector(&allowed.LabelSelector)
if err != nil {
return nil, err
}
}
// Evaluate objects
for _, obj := range objs {
accessor, err := meta.Accessor(obj)
if err != nil {
return nil, err
}
name := accessor.GetName()
allNames[name] = struct{}{}
if hasSelector {
lbls := labels.Set(accessor.GetLabels())
if sel.Matches(lbls) {
selected[name] = struct{}{}
}
}
}
exact := allowed.Exact
for _, name := range exact {
if _, exists := allNames[name]; !exists {
continue
}
selected[name] = struct{}{}
}
for name := range selected {
objects = append(objects, name)
}
sort.Strings(objects)
return objects, nil
}

View File

@@ -0,0 +1,42 @@
// Copyright 2020-2025 Project Capsule Authors
// SPDX-License-Identifier: Apache-2.0
package dra
import (
"fmt"
"github.com/projectcapsule/capsule/internal/webhook/utils"
"github.com/projectcapsule/capsule/pkg/api"
)
type deviceClassForbiddenError struct {
deviceClassName string
spec api.SelectorAllowedListSpec
}
func (i deviceClassForbiddenError) Error() string {
err := fmt.Sprintf("Device Class %s is forbidden for the current Tenant: ", i.deviceClassName)
return utils.AllowedValuesErrorMessage(i.spec, err)
}
func NewDeviceClassForbidden(class string, spec api.SelectorAllowedListSpec) error {
return &deviceClassForbiddenError{
deviceClassName: class,
spec: spec,
}
}
type deviceClassUndefinedError struct {
spec api.SelectorAllowedListSpec
}
func NewDeviceClassUndefined(spec api.SelectorAllowedListSpec) error {
return &deviceClassUndefinedError{
spec: spec,
}
}
func (i deviceClassUndefinedError) Error() string {
return utils.AllowedValuesErrorMessage(i.spec, "Selected DeviceClass is forbidden for the current Tenant or does not exist. Specify a device Class which is allowed by ")
}

View File

@@ -0,0 +1,109 @@
// Copyright 2020-2025 Project Capsule Authors
// SPDX-License-Identifier: Apache-2.0
package dra
import (
"context"
"net/http"
corev1 "k8s.io/api/core/v1"
resources "k8s.io/api/resource/v1"
k8serrors "k8s.io/apimachinery/pkg/api/errors"
"k8s.io/client-go/tools/record"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/webhook/admission"
capsulewebhook "github.com/projectcapsule/capsule/internal/webhook"
"github.com/projectcapsule/capsule/internal/webhook/utils"
"github.com/projectcapsule/capsule/pkg/utils/tenant"
)
type deviceClass struct{}
func DeviceClass() capsulewebhook.Handler {
return &deviceClass{}
}
func (h *deviceClass) OnCreate(c client.Client, decoder admission.Decoder, recorder record.EventRecorder) capsulewebhook.Func {
return func(ctx context.Context, req admission.Request) *admission.Response {
switch res := req.Kind.Kind; res {
case "ResourceClaim":
rc := &resources.ResourceClaim{}
if err := decoder.Decode(req, rc); err != nil {
return utils.ErroredResponse(err)
}
return h.validateResourceRequest(ctx, c, decoder, recorder, req, rc.Namespace, rc.Spec.Devices.Requests)
case "ResourceClaimTemplate":
rct := &resources.ResourceClaimTemplate{}
if err := decoder.Decode(req, rct); err != nil {
return utils.ErroredResponse(err)
}
return h.validateResourceRequest(ctx, c, decoder, recorder, req, rct.Namespace, rct.Spec.Spec.Devices.Requests)
default:
return nil
}
}
}
func (h *deviceClass) OnDelete(client.Client, admission.Decoder, record.EventRecorder) capsulewebhook.Func {
return func(context.Context, admission.Request) *admission.Response {
return nil
}
}
func (h *deviceClass) OnUpdate(client.Client, admission.Decoder, record.EventRecorder) capsulewebhook.Func {
return func(context.Context, admission.Request) *admission.Response {
return nil
}
}
func (h *deviceClass) validateResourceRequest(ctx context.Context, c client.Client, _ admission.Decoder, recorder record.EventRecorder, req admission.Request, namespace string, requests []resources.DeviceRequest) *admission.Response {
tnt, err := tenant.TenantByStatusNamespace(ctx, c, namespace)
if err != nil {
return utils.ErroredResponse(err)
}
if tnt == nil {
return nil
}
allowed := tnt.Spec.DeviceClasses
if allowed == nil {
return nil
}
for _, dr := range requests {
dc, err := utils.GetDeviceClassByName(ctx, c, dr.Exactly.DeviceClassName)
if err != nil && !k8serrors.IsNotFound(err) {
response := admission.Errored(http.StatusInternalServerError, err)
return &response
}
if dc == nil {
recorder.Eventf(tnt, corev1.EventTypeWarning, "MissingDeviceClass", "%s %s/%s is missing DeviceClass", req.Kind.Kind, req.Namespace, req.Name)
response := admission.Denied(NewDeviceClassUndefined(*allowed).Error())
return &response
}
selector := allowed.SelectorMatch(dc)
switch {
case allowed.Match(dc.Name) || selector:
return nil
default:
recorder.Eventf(tnt, corev1.EventTypeWarning, "ForbiddenDeviceClass", "%s %s/%s DeviceClass %s is forbidden for the current Tenant", req.Kind.Kind, req.Namespace, req.Name, &dc)
response := admission.Denied(NewDeviceClassForbidden(dc.Name, *allowed).Error())
return &response
}
}
return nil
}

View File

@@ -0,0 +1,24 @@
// Copyright 2020-2025 Project Capsule Authors
// SPDX-License-Identifier: Apache-2.0
package route
import (
capsulewebhook "github.com/projectcapsule/capsule/internal/webhook"
)
type deviceClass struct {
handlers []capsulewebhook.Handler
}
func DeviceClass(handler ...capsulewebhook.Handler) capsulewebhook.Webhook {
return &deviceClass{handlers: handler}
}
func (w *deviceClass) GetHandlers() []capsulewebhook.Handler {
return w.handlers
}
func (w *deviceClass) GetPath() string {
return "/devices"
}

View File

@@ -20,6 +20,10 @@ func ErroredResponse(err error) *admission.Response {
}
func DefaultAllowedValuesErrorMessage(allowed api.DefaultAllowedListSpec, err string) string {
return AllowedValuesErrorMessage(allowed.SelectorAllowedListSpec, err)
}
func AllowedValuesErrorMessage(allowed api.SelectorAllowedListSpec, err string) string {
var extra []string
if len(allowed.Exact) > 0 {
extra = append(extra, fmt.Sprintf("use one from the following list (%s)", strings.Join(allowed.Exact, ", ")))

View File

@@ -9,6 +9,7 @@ import (
networkingv1 "k8s.io/api/networking/v1"
networkingv1beta1 "k8s.io/api/networking/v1beta1"
resources "k8s.io/api/resource/v1"
schedulev1 "k8s.io/api/scheduling/v1"
storagev1 "k8s.io/api/storage/v1"
"k8s.io/apimachinery/pkg/types"
@@ -77,6 +78,16 @@ func GetGatewayClassClassByObjectName(ctx context.Context, c client.Client, gate
return gatewayClass, nil
}
// Get DeviceClass by name (Does not return error if not found).
func GetDeviceClassByName(ctx context.Context, c client.Client, name string) (*resources.DeviceClass, error) {
class := &resources.DeviceClass{}
if err := c.Get(ctx, types.NamespacedName{Name: name}, class); err != nil {
return nil, err
}
return class, nil
}
// IsDefaultPriorityClass checks if the given PriorityClass is cluster default.
func IsDefaultPriorityClass(class *schedulev1.PriorityClass) bool {
if class != nil {