From 8bfda765f182090e8e6889400ff5d66e3211a980 Mon Sep 17 00:00:00 2001 From: Qing Hao Date: Fri, 25 Jul 2025 10:09:50 +0800 Subject: [PATCH] update kueue integration solution with kueue addon (#1038) Signed-off-by: Qing Hao --- solutions/kueue-admission-check/README.md | 547 ++++++------------ .../env/clusterpermission.yaml | 144 ----- solutions/kueue-admission-check/env/msa.yaml | 7 - ...multicluster.x-k8s.io_clusterprofiles.yaml | 219 ------- .../env/patch-clusterrole.json | 65 --- .../env/patch-mg-sa-cma.json | 18 - .../kueue-admission-check/env/placement.yaml | 14 - .../env/single-clusterqueue-setup-mwrs.yaml | 90 --- .../kueue-admission-check/job-demo1.yaml | 2 +- .../kueue-admission-check/job-demo2.yaml | 2 +- .../multikueue-setup-demo1.yaml | 28 +- .../multikueue-setup-demo2.yaml | 18 +- .../placement-demo2-1.yaml | 4 +- .../placement-demo2-2.yaml | 4 +- solutions/kueue-admission-check/setup-env.sh | 229 ++++---- 15 files changed, 342 insertions(+), 1049 deletions(-) delete mode 100644 solutions/kueue-admission-check/env/clusterpermission.yaml delete mode 100644 solutions/kueue-admission-check/env/msa.yaml delete mode 100644 solutions/kueue-admission-check/env/multicluster.x-k8s.io_clusterprofiles.yaml delete mode 100644 solutions/kueue-admission-check/env/patch-clusterrole.json delete mode 100644 solutions/kueue-admission-check/env/patch-mg-sa-cma.json delete mode 100644 solutions/kueue-admission-check/env/placement.yaml delete mode 100644 solutions/kueue-admission-check/env/single-clusterqueue-setup-mwrs.yaml diff --git a/solutions/kueue-admission-check/README.md b/solutions/kueue-admission-check/README.md index 9c8766f6c..03f709a32 100644 --- a/solutions/kueue-admission-check/README.md +++ b/solutions/kueue-admission-check/README.md @@ -1,4 +1,4 @@ -# Set up Multikueue with OCM Kueue Admission Check Controller +# Setup MultiKueue with Open Cluster Management This guide demonstrates how to use the external OCM [Kueue Admission Check Controller](https://kueue.sigs.k8s.io/docs/concepts/admission_check/) which integrates OCM `Placement` results with [MultiKueue](https://kueue.sigs.k8s.io/docs/concepts/multikueue/) for intelligent multi-cluster job scheduling. The controller reads OCM `Placement` decisions and generates corresponding `MultiKueueConfig` and `MultiKueueCluster` resources, streamlining the setup of the [MultiKueue](https://kueue.sigs.k8s.io/docs/concepts/multikueue/) environment and enabling users to select clusters based on custom criteria. @@ -33,91 +33,61 @@ REF: [Setup a MultiKueue environment](https://kueue.sigs.k8s.io/docs/tasks/manag 1. A Kubernetes environment with OCM installed on a hub cluster and at least three managed clusters. 2. [Kueue](https://kueue.sigs.k8s.io/docs/installation/) deployed across all clusters. 3. [Managed-serviceaccount](https://github.com/open-cluster-management-io/managed-serviceaccount), [cluster-permission](https://github.com/open-cluster-management-io/cluster-permission) and [resource-usage-collect-addon](https://github.com/open-cluster-management-io/addon-contrib/tree/main/resource-usage-collect-addon) installed on managed clusters. +4. [Kueue-addon](https://github.com/open-cluster-management-io/addon-contrib/tree/main/kueue-addon) is installed on managed clusters. + +You can set up all of the above by running the following command (ensure [clusteradm](https://github.com/open-cluster-management-io/clusteradm) is already installed): -- You can set up these above by running the command: ```bash ./setup-env.sh ``` -**Notice**: Currently, this functionality relies on the support of `ClusterProfile` and the user's manual installation of the Admission Check Controller. -OCM achieves this by replacing some OCM images in this `setup-env.sh`. In the future, we plan to address the items listed in the [TODO section](#todo). - -After that, you can verify your setup. - Check the managed clusters. ```bash kubectl get mcl -NAME HUB ACCEPTED MANAGED CLUSTER URLS JOINED AVAILABLE AGE -cluster1 true https://cluster1-control-plane:6443 True True 116s -cluster2 true https://cluster2-control-plane:6443 True True 94s -cluster3 true https://cluster3-control-plane:6443 True True 73s +NAME HUB ACCEPTED MANAGED CLUSTER URLS JOINED AVAILABLE AGE +cluster1 true https://cluster1-control-plane:6443 True True 11m +cluster2 true https://cluster2-control-plane:6443 True True 10m +cluster3 true https://cluster3-control-plane:6443 True True 10m +local-cluster true https://local-cluster-control-plane:6443 True True 11m ``` + - Verify the installed addons. + ```bash kubectl get mca -A -NAMESPACE NAME AVAILABLE DEGRADED PROGRESSING -cluster1 managed-serviceaccount True False -cluster1 resource-usage-collect True False -cluster2 managed-serviceaccount True False -cluster2 resource-usage-collect True False -cluster3 managed-serviceaccount True False -cluster3 resource-usage-collect True False +NAMESPACE NAME AVAILABLE DEGRADED PROGRESSING +cluster1 kueue-addon True False +cluster1 managed-serviceaccount True False +cluster1 resource-usage-collect True False +cluster2 kueue-addon True False +cluster2 managed-serviceaccount True False +cluster2 resource-usage-collect True False +cluster3 kueue-addon True False +cluster3 managed-serviceaccount True False +cluster3 resource-usage-collect True False +local-cluster managed-serviceaccount True False +local-cluster resource-usage-collect True False ``` + - Confirm Kueue is running on the clusters. + ```bash -kubectl get pods -n kueue-system --context kind-hub # Same for managed clusters. +kubectl get pods -n kueue-system --context kind-local-cluster # Same for managed clusters. NAME READY STATUS RESTARTS AGE kueue-controller-manager-87bd7888b-gqk4g 2/2 Running 0 69s ``` -- On the hub cluster, check `ClusterProfiles`. -```bash -kubectl get clusterprofile -A -NAMESPACE NAME AGE -open-cluster-management cluster1 23s -open-cluster-management cluster2 23s -open-cluster-management cluster3 23s -``` -- The `ClusterProfile` status contains credentials that Kueue can use. -```bash -kubectl get clusterprofile -A -ojson | jq '.items[] | .metadata.name, .status.credentials[]' -"cluster1" -{ - "accessRef": { - "kind": "Secret", - "name": "kueue-admin-cluster1-kubeconfig", - "namespace": "kueue-system" - }, - "consumer": "kueue-admin" -} -"cluster2" -{ - "accessRef": { - "kind": "Secret", - "name": "kueue-admin-cluster2-kubeconfig", - "namespace": "kueue-system" - }, - "consumer": "kueue-admin" -} -"cluster3" -{ - "accessRef": { - "kind": "Secret", - "name": "kueue-admin-cluster3-kubeconfig", - "namespace": "kueue-system" - }, - "consumer": "kueue-admin" - -} -``` - On hub cluster, Check secrets with `kubeconfig` for the managed cluster created under `kueue-system` namespace. + ```bash kubectl get secret -n kueue-system -NAME TYPE DATA AGE -kueue-admin-cluster1-kubeconfig Opaque 1 4m4s -kueue-admin-cluster2-kubeconfig Opaque 1 4m4s -kueue-admin-cluster3-kubeconfig Opaque 1 4m4s -kueue-webhook-server-cert Opaque 4 5m27s +NAME TYPE DATA AGE +kueue-webhook-server-cert Opaque 4 5m12s +multikueue-cluster1 Opaque 1 3m38s +multikueue-cluster2 Opaque 1 3m38s +multikueue-cluster3 Opaque 1 3m38s +multikueue-local-cluster Opaque 1 3m38s ``` ## User Stories @@ -126,24 +96,27 @@ kueue-webhook-server-cert Opaque 4 5m27s As an admin, I want to automate [MultiKueue](https://kueue.sigs.k8s.io/docs/concepts/multikueue/) configuration across multiple clusters, so that I can streamline the setup process without manual intervention. -- With the help of the `ClusterProfile` API, we can easily set up MultiKueue environment. +- With the secrets under `kueue-system` auto created, we can easily set up MultiKueue environment. + ```bash kubectl apply -f ./multikueue-setup-demo1.yaml ``` -- After that, check the status of `MultiKueueCluster`, `AdmissionChecks` and `Clusterqueues` + +- After that, check the status of the `MultiKueueCluster`, `AdmissionCheck`, and `ClusterQueue` resources. ```bash -kubectl get multikueuecluster -A -ojson | jq '.items[] | .metadata.name, .status.conditions' +kubectl get multikueueclusters -A -ojson | jq '.items[] | .metadata.name, .status.conditions' kubectl get admissionchecks -ojson | jq '.items[] | .metadata.name, .status.conditions' kubectl get clusterqueues -ojson | jq '.items[] | .metadata.name, .status.conditions' ``` + Success is indicated when "status": "True" and reasons like "Active" or "Ready" are present in the conditions. ```bash -"multikueue-demo1-cluster1" +"multikueue-config-demo1-cluster1" [ { - "lastTransitionTime": "2024-08-31T20:41:41Z", + "lastTransitionTime": "2025-05-29T11:23:17Z", "message": "Connected", "observedGeneration": 1, "reason": "Active", @@ -151,10 +124,10 @@ Success is indicated when "status": "True" and reasons like "Active" or "Ready" "type": "Active" } ] -"multikueue-demo1-cluster2" +"multikueue-config-demo1-cluster2" [ { - "lastTransitionTime": "2024-08-31T20:41:41Z", + "lastTransitionTime": "2025-05-29T11:23:17Z", "message": "Connected", "observedGeneration": 1, "reason": "Active", @@ -165,34 +138,18 @@ Success is indicated when "status": "True" and reasons like "Active" or "Ready" "multikueue-demo1" [ { - "lastTransitionTime": "2024-08-31T20:41:41Z", + "lastTransitionTime": "2025-05-29T11:23:17Z", "message": "The admission check is active", "observedGeneration": 1, "reason": "Active", "status": "True", "type": "Active" - }, - { - "lastTransitionTime": "2024-08-31T20:41:41Z", - "message": "only one multikueue managed admission check can be used in one ClusterQueue", - "observedGeneration": 1, - "reason": "MultiKueue", - "status": "True", - "type": "SingleInstanceInClusterQueue" - }, - { - "lastTransitionTime": "2024-08-31T20:41:41Z", - "message": "admission check cannot be applied at ResourceFlavor level", - "observedGeneration": 1, - "reason": "MultiKueue", - "status": "True", - "type": "FlavorIndependent" } ] -"cluster-queue-demo1" +"cluster-queue" [ { - "lastTransitionTime": "2024-08-31T20:41:41Z", + "lastTransitionTime": "2025-05-29T11:23:17Z", "message": "Can admit new workloads", "observedGeneration": 1, "reason": "Ready", @@ -201,13 +158,15 @@ Success is indicated when "status": "True" and reasons like "Active" or "Ready" } ] ``` + - Deploy a job to the MultiKueue. ```bash kubectl create -f ./job-demo1.yaml ``` -- Check the workload on the managed clusters. Here when the job’s Workload receives a QuotaReservation in the manager cluster, a copy of the Workload is created in all configured worker clusters. -Once `kind-cluster1` admitted the workload, the manager removed the corresponding workloads from the other clusters(`kind-cluster2`). + +- Check the workload on the managed clusters. Here, when the job's Workload receives a QuotaReservation in the manager cluster, a copy of the Workload is created in all configured worker clusters. Once `kind-cluster1` admits the workload, the manager removes the corresponding workloads from the other clusters (e.g., `kind-cluster2`). + ```bash kubectl get workload --context kind-cluster1 NAME QUEUE RESERVED IN ADMITTED AGE @@ -216,49 +175,67 @@ job-demo1-jobnktc6-6c5f3 user-queue-demo1 cluster-queue-demo1 True 5 kubectl get workload --context kind-cluster2 No resources found in default namespace. # After cluster1 admitted the workload, no workload should show up here. ``` + #### Story 2 As an admin, I want to use OCM `Placement` results for scheduling, so that clusters with specific attributes, like those with the `nvidia-t4` GPU accelerator label, are automatically selected and converted into a [MultiKueue](https://kueue.sigs.k8s.io/docs/concepts/multikueue/) for targeted workload deployment. -- You can manually label the accelerators on the clusters. +- Cleanup the resource from demo1. + ```bash -kubectl label managedcluster cluster2 accelerator=nvidia-tesla-t4 -kubectl label managedcluster cluster3 accelerator=nvidia-tesla-t4 +kubectl delete -f ./multikueue-setup-demo1.yaml ``` -The `placememt-demo2-1.yaml` selects clusters with the `nvidia-tesla-t4` accelerator label. + +- If your environment is set up by `setup-env.sh`, you will see cluster2 and cluster3 with the label `accelerator=nvidia-tesla-t4` and 3 fake GPU resources. + +```bash +kubectl get mcl -l accelerator=nvidia-tesla-t4 +NAME HUB ACCEPTED MANAGED CLUSTER URLS JOINED AVAILABLE AGE +cluster2 true https://cluster2-control-plane:6443 True True 37m +cluster3 true https://cluster3-control-plane:6443 True True 37m + +kubectl get node -ojson --context kind-cluster2 | jq '.items[] | .status.capacity, .status.allocatable' | grep gpu + "nvidia.com/gpu": "3", + "nvidia.com/gpu": "3", +kubectl get node -ojson --context kind-cluster3 | jq '.items[] | .status.capacity, .status.allocatable' | grep gpu + "nvidia.com/gpu": "3", + "nvidia.com/gpu": "3", +``` + +- Bind the cluster set to the Kueue namespace and verify the bindings. + +```bash +clusteradm clusterset bind global --namespace kueue-system +clusteradm get clustersets + +└── +│ ├── 4 ManagedClusters selected +│ ├── [cluster1 cluster2 cluster3 local-cluster] +│ ├── +└── + └── kueue-system,open-cluster-management-addon + └── 4 ManagedClusters selected + └── [cluster1 cluster2 cluster3 local-cluster] +``` + +- The `placement-demo2-1.yaml` selects clusters with the `nvidia-tesla-t4` accelerator label. + ```yaml apiVersion: cluster.open-cluster-management.io/v1beta1 kind: Placement metadata: - name: placement-demo2 + name: multikueue-config-demo2 namespace: kueue-system spec: - clusterSets: - - spoke - tolerations: - - key: cluster.open-cluster-management.io/unreachable - operator: Exists - - key: cluster.open-cluster-management.io/unavailable - operator: Exists +... predicates: - requiredClusterSelector: labelSelector: matchLabels: accelerator: nvidia-tesla-t4 ``` -- Bind the cluster set to the Kueue namespace and verify the bindings. -```bash -clusteradm clusterset bind spoke --namespace kueue-system -clusteradm get clustersets - -└── - └── default,kueue-system - └── 3 ManagedClusters selected - └── [cluster1 cluster2 cluster3] -``` - -- Apply the placement policy. + Apply the placement. ```bash kubectl apply -f placement-demo2-1.yaml @@ -270,30 +247,35 @@ kubectl apply -f placement-demo2-1.yaml kubectl apply -f ./multikueue-setup-demo2.yaml ``` -- Check the `MultikueueKonfig` and `MultikueueClusters`. +- Check the `MultikueueConfig` and `MultikueueClusters`. ```bash kubectl get multikueueconfig NAME AGE -placement-demo2 60s +multikueue-config-demo2 10s -kubectl get multikueuecluster -NAME AGE -placement-demo2-cluster2 60s -placement-demo2-cluster3 60s +kubectl get multikueueclusters +NAME AGE +multikueue-config-demo2-cluster2 19s +multikueue-config-demo2-cluster3 19s ``` -- After that, check the status of `MultiKueueCluster`, `AdmissionChecks` and `Clusterqueues` + +- After that, check the status of the `MultiKueueCluster`, `AdmissionCheck`, and `ClusterQueue` resources. + + ```bash -kubectl get multikueuecluster -A -ojson | jq '.items[] | .metadata.name, .status.conditions' +kubectl get multikueueclusters -A -ojson | jq '.items[] | .metadata.name, .status.conditions' kubectl get admissionchecks -ojson | jq '.items[] | .metadata.name, .status.conditions' kubectl get clusterqueues -ojson | jq '.items[] | .metadata.name, .status.conditions' ``` + If success, there should be "status": "True" and reasons like "Active" or "Ready" presented in the conditions. + ```bash -"placement-demo2-cluster2" +"multikueue-config-demo2-cluster2" [ { - "lastTransitionTime": "2024-08-31T22:03:16Z", + "lastTransitionTime": "2025-05-29T11:28:34Z", "message": "Connected", "observedGeneration": 1, "reason": "Active", @@ -301,10 +283,10 @@ If success, there should be "status": "True" and reasons like "Active" or "Ready "type": "Active" } ] -"placement-demo2-cluster3" +"multikueue-config-demo2-cluster3" [ { - "lastTransitionTime": "2024-08-31T22:03:16Z", + "lastTransitionTime": "2025-05-29T11:28:34Z", "message": "Connected", "observedGeneration": 1, "reason": "Active", @@ -312,47 +294,31 @@ If success, there should be "status": "True" and reasons like "Active" or "Ready "type": "Active" } ] -"multikueue-demo2" # The status of the admissioncheck `multikueue-demo2` +"multikueue-config-demo2" # The status of the admissioncheck `multikueue-config-demo2` [ { - "lastTransitionTime": "2024-08-31T22:03:16Z", + "lastTransitionTime": "2025-05-29T11:28:34Z", + "message": "MultiKueueConfig multikueue-config-demo2 and MultiKueueClusters are generated successfully", + "reason": "Active", + "status": "True", + "type": "Active" + } +] +"multikueue-demo2" # The status of the admissioncheck `multikueue-demo2` +[ + { + "lastTransitionTime": "2025-05-29T11:28:34Z", "message": "The admission check is active", "observedGeneration": 1, "reason": "Active", "status": "True", "type": "Active" - }, - { - "lastTransitionTime": "2024-08-31T22:03:16Z", - "message": "only one multikueue managed admission check can be used in one ClusterQueue", - "observedGeneration": 1, - "reason": "MultiKueue", - "status": "True", - "type": "SingleInstanceInClusterQueue" - }, - { - "lastTransitionTime": "2024-08-31T22:03:16Z", - "message": "admission check cannot be applied at ResourceFlavor level", - "observedGeneration": 1, - "reason": "MultiKueue", - "status": "True", - "type": "FlavorIndependent" } ] -"placement-demo2" # The status of the admissioncheck `placement-demo2` +"cluster-queue" [ { - "lastTransitionTime": "2024-08-31T22:03:16Z", - "message": "MultiKueueConfig and MultiKueueCluster generated", - "reason": "Active", - "status": "True", - "type": "Active" - } -] -"cluster-queue-demo2" -[ - { - "lastTransitionTime": "2024-08-31T22:03:16Z", + "lastTransitionTime": "2025-05-29T11:28:34Z", "message": "Can admit new workloads", "observedGeneration": 1, "reason": "Ready", @@ -361,86 +327,30 @@ If success, there should be "status": "True" and reasons like "Active" or "Ready } ] ``` + - Create a job requesting GPU resources to the MultiKueue. + ```bash kubectl create -f ./job-demo2.yaml ``` -- Check the workload on managed clusters. Like we explained in the case in story 1, once one cluster(here `kind-cluster3`) has admitted the workload, the manager removed the corresponding workloads from the other clusters(here `kind-cluster2`). + +- Check the workload on managed clusters. As explained in Story 1, once one cluster (here `kind-cluster3`) has admitted the workload, the manager removes the corresponding workloads from the other clusters (here `kind-cluster2`). + ```bash kubectl get workload --context kind-cluster2 No resources found in default namespace. kubectl get workload --context kind-cluster3 -NAME QUEUE RESERVED IN ADMITTED AGE -job-demo2-jobl2t6d-a8cdd user-queue-demo2 cluster-queue-demo2 True 3s +NAME QUEUE RESERVED IN ADMITTED FINISHED AGE +job-demo2-jobfpf8q-58705 user-queue cluster-queue True 5m24s ``` + #### Story 3 As an admin, I want to leverage OCM's `AddonPlacementScore` for dynamic workload scheduling, so that clusters with higher GPU scores, indicating clusters with more GPU resources, are selected and converted into a [MultiKueue](https://kueue.sigs.k8s.io/docs/concepts/multikueue/), which automatically adjusts by adding or removing clusters as scores change. -`placememt-demo2-2` selects clusters with the `nvidia-tesla-t4` accelerator label, and select one cluster with the highest GPU-score, indicating having more GPU resources. +- Here in this environment, cluster1 has no GPUs, while cluster2 and cluster3 each have 3 GPUs. Check `AddonPlacementScore`—the score ranges from -100 to 100, with clusters having more resources available receiving higher scores. Here, cluster1, which has no GPUs, should have a score of -100, and the cluster running the workload (from Story 2, `kind-cluster3`) will have a lower score. -```yaml -apiVersion: cluster.open-cluster-management.io/v1beta1 -kind: Placement -metadata: - name: placement-demo2 - namespace: kueue-system -spec: - clusterSets: - - spoke - tolerations: - - key: cluster.open-cluster-management.io/unreachable - operator: Exists - - key: cluster.open-cluster-management.io/unavailable - operator: Exists - predicates: - - requiredClusterSelector: - labelSelector: - matchLabels: - accelerator: nvidia-tesla-t4 - numberOfClusters: 1 - prioritizerPolicy: - mode: Exact - configurations: - - scoreCoordinate: - type: AddOn - addOn: - resourceName: resource-usage-score - scoreName: gpuClusterAvailable - weight: 1 -``` -- You can manually edit the GPU resources on the managed clusters for testing, for example on `kind-cluster2`, set 3 fake GPU resources on the `control-plane-node`. -```bash -kubectl edit-status node cluster2-control-plane --context kind-cluster2 # Same operation with other clusters/nodes. -``` -- Edit the `status` of the node `cluster2-control-plane`: -```yaml - allocatable: - cpu: "8" - ephemeral-storage: 61202244Ki - hugepages-1Gi: "0" - hugepages-2Mi: "0" - hugepages-32Mi: "0" - hugepages-64Ki: "0" - memory: 8027168Ki - nvidia.com/gpu: "3" # Add 3 fake GPUs in allocatable - pods: "110" - capacity: - cpu: "8" - ephemeral-storage: 61202244Ki - hugepages-1Gi: "0" - hugepages-2Mi: "0" - hugepages-32Mi: "0" - hugepages-64Ki: "0" - memory: 8027168Ki - nvidia.com/gpu: "3" # Add 3 fake GPUs in capacity - pods: "110" -``` - -- Here in this environment, cluster1 has no GPUs, while cluster2 and cluster3 each have 3 GPUs. -Check `AddonPlacementScore`, the range of the score is from -100 to 100, clusters with more resources available have higher scores. -Here cluster1, which has no GPUs, should have a score of -100, and the cluster running the workload(here from story 2 we have one workload running on `kind-cluster3`) will have a lower score. ```bash kubectl get addonplacementscore -A -ojson | jq '.items[] | .metadata.name, .status.scores[5]' "resource-usage-score" @@ -460,166 +370,69 @@ kubectl get addonplacementscore -A -ojson | jq '.items[] | .metadata.name, .stat } ``` -- Apply the changes in the `Placement` to update MultiKueue dynamically. +- The `placement-demo2-2.yaml` selects clusters with the `nvidia-tesla-t4` accelerator label, and select one cluster with the highest GPU-score, indicating having more GPU resources. + +```yaml +apiVersion: cluster.open-cluster-management.io/v1beta1 +kind: Placement +metadata: + name: multikueue-config-demo2 + namespace: kueue-system +spec: +... + predicates: + - requiredClusterSelector: + labelSelector: + matchLabels: + accelerator: nvidia-tesla-t4 + numberOfClusters: 1 + prioritizerPolicy: + mode: Exact + configurations: + - scoreCoordinate: + type: AddOn + addOn: + resourceName: resource-usage-score + scoreName: gpuClusterAvailable + weight: 1 +``` + + Apply the changes in the `Placement` to update MultiKueue dynamically. + ```bash kubectl apply -f ./placement-demo2-2.yaml ``` -- Review the update in `MultikueueKonfig`. -```bash -kubectl get multikueueconfig -NAME AGE -placement-demo2 22m +- Review the update in `MultikueueConfig`. -kubectl get multikueueconfig placement-demo2 -oyaml -apiVersion: kueue.x-k8s.io/v1alpha1 +```bash +kubectl get multikueueconfig multikueue-config-demo2 -oyaml +apiVersion: kueue.x-k8s.io/v1beta1 kind: MultiKueueConfig metadata: - creationTimestamp: "2024-08-31T22:03:16Z" - generation: 5 - name: placement-demo2 - resourceVersion: "18109" - uid: 3c16af72-94bf-4444-bf79-7e896165aabc + creationTimestamp: "2025-05-29T11:28:34Z" + generation: 7 + name: multikueue-config-demo2 + resourceVersion: "11913" + uid: da363d4c-c0e8-43b4-a335-a52dc5a3cabf spec: clusters: - - placement-demo2-cluster2 # cluster2 has a higher GPU score, so it got selected by the placement decision. + - multikueue-config-demo2-cluster2 # cluster2 has a higher GPU score, so it got selected by the placement decision. ``` -- Create a job for the updated MultiKueue and check the workload, this time the workload is admitted by `kind-cluster2`, in `kind-cluster3` can only find the old workload from Story 2. + +- Create a job for the updated MultiKueue and check the workload, this time the workload is admitted by `kind-cluster2`. In `kind-cluster3`, you can only find the old workload from Story 2. + ```bash kubectl create -f ./job-demo2.yaml kubectl get workload --context kind-cluster2 -NAME QUEUE RESERVED IN ADMITTED AGE -job-demo2-jobxn888-4b91e user-queue-demo2 cluster-queue-demo2 True 6s +NAME QUEUE RESERVED IN ADMITTED FINISHED AGE +job-demo2-jobfxmh7-f4c34 user-queue cluster-queue True 8s kubectl get workload --context kind-cluster3 -NAME QUEUE RESERVED IN ADMITTED AGE -job-demo2-jobl2t6d-a8cdd user-queue-demo2 cluster-queue-demo2 True 9m13s +NAME QUEUE RESERVED IN ADMITTED FINISHED AGE +job-demo2-jobfpf8q-58705 user-queue cluster-queue True 5m24s ``` -## Design Details +## Design Details and Workflow -### OCM Admission Check Controller - -The OCM Admission Check Controller will integrate OCM `Placement` results into MultiKueue by reading `Placement` decisions and generating the necessary `MultiKueueConfig` and `MultiKueueCluster` resources. - -- `controllerName`: Identifies the controller that processes the Admission Check, currently set to `open-cluster-management.io/placement` -- `parameters`: Identifies a configuration with additional parameters for the check, here we add the existing OCM `Placement` component. Clusters specified in the `Placement` will be bound to the `kueue-system` namespace. - -Example OCM Admission Check Controller design: - -```yaml -# OCM implements an admissioncheck controller to automate the MultiKueue setup process. -# MultiKueueConfigs and MultiKueueClusters are generated dynamically based on OCM placement decisions. -apiVersion: kueue.x-k8s.io/v1beta1 -kind: AdmissionCheck -metadata: - name: placement-demo2 -spec: - controllerName: open-cluster-management.io/placement - parameters: - apiGroup: cluster.open-cluster-management.io - kind: Placement - name: placement-demo2 -# Leverages OCM's placement mechanism to select clusters based on specific criteria. -# For example `Placement-demo2-1` selects clusters with the `nvidia-tesla-t4` accelerator label. -``` - -### Changes in the Configuration Process with OCM Admission Check Controller - -Using the OCM Admission Check Controller significantly simplifies the configuration process for system administrators by automating several manual tasks. - -#### Before Using OCM Admission Check Controller - -In the traditional setup, administrators must manually configure both `MultiKueueConfig` and `MultiKueueCluster` resources: - -- **MultiKueueConfig**: Defines which clusters are part of the [MultiKueue](https://kueue.sigs.k8s.io/docs/concepts/multikueue/) environment. Admins need to specify each cluster manually. -- **MultiKueueCluster**: Each cluster requires a `MultiKueueCluster` resource, which includes a kubeconfig secret that administrators must create manually for secure communication. - -```yaml -apiVersion: kueue.x-k8s.io/v1alpha1 -kind: MultiKueueConfig -metadata: - name: multikueue-config -spec: - clusters: - - multikueue-cluster1 - - multikueue-cluster2 ---- -apiVersion: kueue.x-k8s.io/v1alpha1 -kind: MultiKueueCluster -metadata: - name: multikueue-cluster1 -spec: - kubeConfig: - locationType: Secret - location: kueue-admin-cluster1-kubeconfig ---- -apiVersion: kueue.x-k8s.io/v1alpha1 -kind: MultiKueueCluster -metadata: - name: multikueue-cluster2 -spec: - kubeConfig: - locationType: Secret - location: kueue-admin-cluster2-kubeconfig -``` - -#### After Using OCM Admission Check Controller - -With the OCM Admission Check Controller, the need for manual configuration of `MultiKueueConfig` and `MultiKueueCluster` is eliminated. Instead, the administrator only needs to configure two additional admission checks in the ClusterQueue resource: -`multikueue-demo2` and `placement-demo2` (see in `multikueue-setup-demo2.yaml`) which leverage OCM's placement mechanism to select clusters based on specific criteria and automate the process of setting up `MultiKueueConfig` and `MultiKueueCluster`. - -```yaml -apiVersion: kueue.x-k8s.io/v1beta1 -kind: ClusterQueue -metadata: - name: "cluster-queue-demo2" -spec: - namespaceSelector: {} # match all. - resourceGroups: - - coveredResources: ["cpu", "memory","nvidia.com/gpu"] - flavors: - - name: "default-flavor-demo2" - resources: - - name: "cpu" - nominalQuota: 9 - - name: "memory" - nominalQuota: 36Gi - - name: "nvidia.com/gpu" - nominalQuota: 3 - admissionChecks: - - multikueue-demo2 - - placement-demo2 ---- -apiVersion: kueue.x-k8s.io/v1beta1 -kind: AdmissionCheck -metadata: - name: multikueue-demo2 -spec: - controllerName: kueue.x-k8s.io/multikueue - parameters: - apiGroup: kueue.x-k8s.io - kind: MultiKueueConfig - name: placement-demo2 ---- -apiVersion: kueue.x-k8s.io/v1beta1 -kind: AdmissionCheck -metadata: - name: placement-demo2 -spec: - controllerName: open-cluster-management.io/placement - parameters: - apiGroup: cluster.open-cluster-management.io - kind: Placement - name: placement-demo2 -``` - -#### OCM Admission Check Controller Workflow - -- The OCM Admission Check Controller retrieves the OCM `Placement` associated with an AdmissionCheck in the `kueue-system` namespace. -- It uses a `PlacementDecisionTracker` to gather the selected clusters and retrieves their `ClusterProfile` for `credentials`. -- The controller creates or updates `MultiKueueCluster` resources with the kubeconfig details for each cluster, and then lists these clusters in a `MultiKueueConfig` resource. -- Finally, it updates the AdmissionCheck condition to true, indicating successful generation of the `MultiKueueConfig` and `MultiKueueCluster`, readying the [MultiKueue](https://kueue.sigs.k8s.io/docs/concepts/multikueue/) environment for job scheduling. - -## TODO -- In the future, the `AdmissionCheckcontroller` may be added to `featureGates` as a user-enabled feature or possibly developed into an individual component running as a pod on the `hub`. \ No newline at end of file +For more detailed design and workflow information, please refer to the [kueue-addon](https://github.com/open-cluster-management-io/addon-contrib/blob/main/kueue-addon/README.md). diff --git a/solutions/kueue-admission-check/env/clusterpermission.yaml b/solutions/kueue-admission-check/env/clusterpermission.yaml deleted file mode 100644 index 86602c9fb..000000000 --- a/solutions/kueue-admission-check/env/clusterpermission.yaml +++ /dev/null @@ -1,144 +0,0 @@ -# the permission is copied from https://kueue.sigs.k8s.io/docs/tasks/manage/setup_multikueue/ -apiVersion: rbac.open-cluster-management.io/v1alpha1 -kind: ClusterPermission -metadata: - name: kueue-admin-CLUSTER_NAME - namespace: CLUSTER_NAME -spec: - clusterRole: - rules: - - apiGroups: - - batch - resources: - - jobs - verbs: - - create - - delete - - get - - list - - watch - - apiGroups: - - batch - resources: - - jobs/status - verbs: - - get - - apiGroups: - - jobset.x-k8s.io - resources: - - jobsets - verbs: - - create - - delete - - get - - list - - watch - - apiGroups: - - jobset.x-k8s.io - resources: - - jobsets/status - verbs: - - get - - apiGroups: - - kueue.x-k8s.io - resources: - - workloads - verbs: - - create - - delete - - get - - list - - watch - - apiGroups: - - kueue.x-k8s.io - resources: - - workloads/status - verbs: - - get - - patch - - update - - apiGroups: - - kubeflow.org - resources: - - tfjobs - verbs: - - create - - delete - - get - - list - - watch - - apiGroups: - - kubeflow.org - resources: - - tfjobs/status - verbs: - - get - - apiGroups: - - kubeflow.org - resources: - - paddlejobs - verbs: - - create - - delete - - get - - list - - watch - - apiGroups: - - kubeflow.org - resources: - - paddlejobs/status - verbs: - - get - - apiGroups: - - kubeflow.org - resources: - - pytorchjobs - verbs: - - create - - delete - - get - - list - - watch - - apiGroups: - - kubeflow.org - resources: - - pytorchjobs/status - verbs: - - get - - apiGroups: - - kubeflow.org - resources: - - xgboostjobs - verbs: - - create - - delete - - get - - list - - watch - - apiGroups: - - kubeflow.org - resources: - - xgboostjobs/status - verbs: - - get - - apiGroups: - - kubeflow.org - resources: - - mpijobs - verbs: - - create - - delete - - get - - list - - watch - - apiGroups: - - kubeflow.org - resources: - - mpijobs/status - verbs: - - get - clusterRoleBinding: - subject: - kind: ServiceAccount - name: kueue-admin-CLUSTER_NAME - namespace: open-cluster-management-agent-addon diff --git a/solutions/kueue-admission-check/env/msa.yaml b/solutions/kueue-admission-check/env/msa.yaml deleted file mode 100644 index 14db974a9..000000000 --- a/solutions/kueue-admission-check/env/msa.yaml +++ /dev/null @@ -1,7 +0,0 @@ -apiVersion: authentication.open-cluster-management.io/v1beta1 -kind: ManagedServiceAccount -metadata: - name: kueue-admin-CLUSTER_NAME - namespace: CLUSTER_NAME -spec: - rotation: {} diff --git a/solutions/kueue-admission-check/env/multicluster.x-k8s.io_clusterprofiles.yaml b/solutions/kueue-admission-check/env/multicluster.x-k8s.io_clusterprofiles.yaml deleted file mode 100644 index 6ddbff0e9..000000000 --- a/solutions/kueue-admission-check/env/multicluster.x-k8s.io_clusterprofiles.yaml +++ /dev/null @@ -1,219 +0,0 @@ ---- -apiVersion: apiextensions.k8s.io/v1 -kind: CustomResourceDefinition -metadata: - annotations: - controller-gen.kubebuilder.io/version: v0.14.0 - name: clusterprofiles.multicluster.x-k8s.io -spec: - group: multicluster.x-k8s.io - names: - kind: ClusterProfile - listKind: ClusterProfileList - plural: clusterprofiles - singular: clusterprofile - scope: Namespaced - versions: - - name: v1alpha1 - schema: - openAPIV3Schema: - description: ClusterProfile represents a single cluster in a multi-cluster - deployment. - properties: - apiVersion: - description: |- - APIVersion defines the versioned schema of this representation of an object. - Servers should convert recognized schemas to the latest internal value, and - may reject unrecognized values. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources - type: string - kind: - description: |- - Kind is a string value representing the REST resource this object represents. - Servers may infer this from the endpoint the client submits requests to. - Cannot be updated. - In CamelCase. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds - type: string - metadata: - type: object - spec: - description: ClusterProfileSpec defines the desired state of ClusterProfile. - properties: - clusterManager: - description: ClusterManager defines which cluster manager owns this - ClusterProfile resource - properties: - name: - description: Name defines the name of the cluster manager - type: string - required: - - name - type: object - x-kubernetes-validations: - - message: ClusterManager is immutable - rule: self == oldSelf - displayName: - description: DisplayName defines a human-readable name of the ClusterProfile - type: string - required: - - clusterManager - type: object - status: - description: ClusterProfileStatus defines the observed state of ClusterProfile. - properties: - conditions: - description: Conditions contains the different condition statuses - for this cluster. - items: - description: "Condition contains details for one aspect of the current - state of this API Resource.\n---\nThis struct is intended for - direct use as an array at the field path .status.conditions. For - example,\n\n\n\ttype FooStatus struct{\n\t // Represents the - observations of a foo's current state.\n\t // Known .status.conditions.type - are: \"Available\", \"Progressing\", and \"Degraded\"\n\t // - +patchMergeKey=type\n\t // +patchStrategy=merge\n\t // +listType=map\n\t - \ // +listMapKey=type\n\t Conditions []metav1.Condition `json:\"conditions,omitempty\" - patchStrategy:\"merge\" patchMergeKey:\"type\" protobuf:\"bytes,1,rep,name=conditions\"`\n\n\n\t - \ // other fields\n\t}" - properties: - lastTransitionTime: - description: |- - lastTransitionTime is the last time the condition transitioned from one status to another. - This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. - format: date-time - type: string - message: - description: |- - message is a human readable message indicating details about the transition. - This may be an empty string. - maxLength: 32768 - type: string - observedGeneration: - description: |- - observedGeneration represents the .metadata.generation that the condition was set based upon. - For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date - with respect to the current state of the instance. - format: int64 - minimum: 0 - type: integer - reason: - description: |- - reason contains a programmatic identifier indicating the reason for the condition's last transition. - Producers of specific condition types may define expected values and meanings for this field, - and whether the values are considered a guaranteed API. - The value should be a CamelCase string. - This field may not be empty. - maxLength: 1024 - minLength: 1 - pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ - type: string - status: - description: status of the condition, one of True, False, Unknown. - enum: - - "True" - - "False" - - Unknown - type: string - type: - description: |- - type of condition in CamelCase or in foo.example.com/CamelCase. - --- - Many .condition.type values are consistent across resources like Available, but because arbitrary conditions can be - useful (see .node.status.conditions), the ability to deconflict is important. - The regex it matches is (dns1123SubdomainFmt/)?(qualifiedNameFmt) - maxLength: 316 - pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ - type: string - required: - - lastTransitionTime - - message - - reason - - status - - type - type: object - type: array - credentials: - description: |- - TokenRequests describes a list of token requests on this cluster and its - approval status. - items: - properties: - accessRef: - description: RequestRef points to a specific AuthTokenRequest - object. - properties: - kind: - description: Kind is the kind of the referred token request - object. - type: string - name: - description: Name is the name of the referred token request - object. - type: string - namespace: - description: Namespace is the namespace of the referred - token request object. - type: string - required: - - kind - - name - - namespace - type: object - consumer: - type: string - required: - - accessRef - - consumer - type: object - type: array - properties: - description: |- - Properties defines name/value pairs to represent properties of a cluster. - It could be a collection of ClusterProperty (KEP-2149) resources, - but could also be info based on other implementations. - The names of the properties can be predefined names from ClusterProperty resources - and is allowed to be customized by different cluster managers. - items: - description: |- - Property defines a name/value pair to represent a property of a cluster. - It could be a ClusterProperty (KEP-2149) resource, - but could also be info based on other implementations. - The name of the property can be predefined name from a ClusterProperty resource - and is allowed to be customized by different cluster managers. - This property can store various configurable details and metrics of a cluster, - which may include information such as the number of nodes, total and free CPU, - and total and free memory, among other potential attributes. - properties: - name: - description: |- - Name is the name of a property resource on cluster. It's a well-known - or customized name to identify the property. - maxLength: 253 - minLength: 1 - type: string - value: - description: Value is a property-dependent string - maxLength: 1024 - minLength: 1 - type: string - required: - - name - - value - type: object - type: array - version: - description: Version defines the version information of the cluster. - properties: - kubernetes: - description: Kubernetes is the kubernetes version of the cluster. - type: string - type: object - type: object - required: - - spec - type: object - served: true - storage: true - subresources: - status: {} diff --git a/solutions/kueue-admission-check/env/patch-clusterrole.json b/solutions/kueue-admission-check/env/patch-clusterrole.json deleted file mode 100644 index f01b1b872..000000000 --- a/solutions/kueue-admission-check/env/patch-clusterrole.json +++ /dev/null @@ -1,65 +0,0 @@ -[ - { - "op": "add", - "path": "/rules/-", - "value": { - "apiGroups": ["rbac.open-cluster-management.io"], - "resources": ["clusterpermissions"], - "verbs": ["get", "list", "watch", "create", "update", "patch", "delete"] - } - }, - { - "op": "add", - "path": "/rules/-", - "value": { - "apiGroups": ["authentication.open-cluster-management.io"], - "resources": ["managedserviceaccounts"], - "verbs": ["get", "list", "watch", "create", "update", "patch", "delete"] - } - }, - { - "op": "add", - "path": "/rules/-", - "value": { - "apiGroups": ["kueue.x-k8s.io"], - "resources": ["multikueueconfigs"], - "verbs": ["get", "list", "watch", "create", "update", "patch", "delete"] - } - }, - { - "op": "add", - "path": "/rules/-", - "value": { - "apiGroups": ["kueue.x-k8s.io"], - "resources": ["multikueueclusters"], - "verbs": ["get", "list", "watch", "create", "update", "patch", "delete"] - } - }, - { - "op": "add", - "path": "/rules/-", - "value": { - "apiGroups": ["kueue.x-k8s.io"], - "resources": ["admissionchecks"], - "verbs": ["get", "list", "watch", "create", "update", "patch", "delete"] - } - }, - { - "op": "add", - "path": "/rules/-", - "value": { - "apiGroups": ["kueue.x-k8s.io"], - "resources": ["admissionchecks/status"], - "verbs": ["update", "patch"] - } - }, - { - "op": "add", - "path": "/rules/-", - "value": { - "apiGroups": [""], - "resources": ["secrets"], - "verbs": ["get", "list", "watch", "create", "update", "patch", "delete"] - } - } -] diff --git a/solutions/kueue-admission-check/env/patch-mg-sa-cma.json b/solutions/kueue-admission-check/env/patch-mg-sa-cma.json deleted file mode 100644 index 09cfb0367..000000000 --- a/solutions/kueue-admission-check/env/patch-mg-sa-cma.json +++ /dev/null @@ -1,18 +0,0 @@ -[ - { - "op": "replace", - "path": "/spec/installStrategy", - "value": { - "placements": [ - { - "name": "placement-spoke", - "namespace": "default", - "rolloutStrategy": { - "type": "All" - } - } - ], - "type": "Placements" - } - } -] diff --git a/solutions/kueue-admission-check/env/placement.yaml b/solutions/kueue-admission-check/env/placement.yaml deleted file mode 100644 index d6bfbbea4..000000000 --- a/solutions/kueue-admission-check/env/placement.yaml +++ /dev/null @@ -1,14 +0,0 @@ -# clusteradm clusterset bind global --namespace default -apiVersion: cluster.open-cluster-management.io/v1beta1 -kind: Placement -metadata: - name: placement-spoke - namespace: default -spec: - clusterSets: - - spoke - tolerations: - - key: cluster.open-cluster-management.io/unreachable - operator: Exists - - key: cluster.open-cluster-management.io/unavailable - operator: Exists diff --git a/solutions/kueue-admission-check/env/single-clusterqueue-setup-mwrs.yaml b/solutions/kueue-admission-check/env/single-clusterqueue-setup-mwrs.yaml deleted file mode 100644 index de636fca9..000000000 --- a/solutions/kueue-admission-check/env/single-clusterqueue-setup-mwrs.yaml +++ /dev/null @@ -1,90 +0,0 @@ -apiVersion: work.open-cluster-management.io/v1alpha1 -kind: ManifestWorkReplicaSet -metadata: - name: single-clusterqueue - namespace: default -spec: - placementRefs: - - name: placement-spoke - manifestWorkTemplate: - workload: - manifests: - - apiVersion: rbac.authorization.k8s.io/v1 - kind: ClusterRoleBinding - metadata: - name: kueue-manager-ocm-rolebinding - roleRef: - apiGroup: rbac.authorization.k8s.io - kind: ClusterRole - name: kueue-manager-role - subjects: - - kind: ServiceAccount - name: klusterlet-work-sa - namespace: open-cluster-management-agent - - apiVersion: rbac.authorization.k8s.io/v1 - kind: ClusterRoleBinding - metadata: - name: kueue-batch-admin-ocm-rolebinding - roleRef: - apiGroup: rbac.authorization.k8s.io - kind: ClusterRole - name: kueue-batch-admin-role - subjects: - - kind: ServiceAccount - name: klusterlet-work-sa - namespace: open-cluster-management-agent - - apiVersion: kueue.x-k8s.io/v1beta1 - kind: ResourceFlavor - metadata: - name: "default-flavor-demo1" - - apiVersion: kueue.x-k8s.io/v1beta1 - kind: ClusterQueue - metadata: - name: "cluster-queue-demo1" - spec: - namespaceSelector: {} # match all. - resourceGroups: - - coveredResources: ["cpu", "memory"] - flavors: - - name: "default-flavor-demo1" - resources: - - name: "cpu" - nominalQuota: 9 - - name: "memory" - nominalQuota: 36Gi - - apiVersion: kueue.x-k8s.io/v1beta1 - kind: LocalQueue - metadata: - namespace: "default" - name: "user-queue-demo1" - spec: - clusterQueue: "cluster-queue-demo1" - - apiVersion: kueue.x-k8s.io/v1beta1 - kind: ResourceFlavor - metadata: - name: "default-flavor-demo2" - - apiVersion: kueue.x-k8s.io/v1beta1 - kind: ClusterQueue - metadata: - name: "cluster-queue-demo2" - spec: - namespaceSelector: {} # match all. - resourceGroups: - - coveredResources: ["cpu", "memory","nvidia.com/gpu"] - flavors: - - name: "default-flavor-demo2" - resources: - - name: "cpu" - nominalQuota: 9 - - name: "memory" - nominalQuota: 36Gi - - name: "nvidia.com/gpu" - nominalQuota: 3 - - apiVersion: kueue.x-k8s.io/v1beta1 - kind: LocalQueue - metadata: - namespace: "default" - name: "user-queue-demo2" - spec: - clusterQueue: "cluster-queue-demo2" - diff --git a/solutions/kueue-admission-check/job-demo1.yaml b/solutions/kueue-admission-check/job-demo1.yaml index e68cda738..1aca99c4f 100644 --- a/solutions/kueue-admission-check/job-demo1.yaml +++ b/solutions/kueue-admission-check/job-demo1.yaml @@ -4,7 +4,7 @@ metadata: generateName: demo1-job namespace: default labels: - kueue.x-k8s.io/queue-name: user-queue-demo1 + kueue.x-k8s.io/queue-name: "user-queue" spec: parallelism: 1 completions: 1 diff --git a/solutions/kueue-admission-check/job-demo2.yaml b/solutions/kueue-admission-check/job-demo2.yaml index 7b4aa845a..40eb03ddd 100644 --- a/solutions/kueue-admission-check/job-demo2.yaml +++ b/solutions/kueue-admission-check/job-demo2.yaml @@ -4,7 +4,7 @@ metadata: generateName: demo2-job namespace: default labels: - kueue.x-k8s.io/queue-name: "user-queue-demo2" + kueue.x-k8s.io/queue-name: "user-queue" spec: parallelism: 1 completions: 1 diff --git a/solutions/kueue-admission-check/multikueue-setup-demo1.yaml b/solutions/kueue-admission-check/multikueue-setup-demo1.yaml index 11a1b4d7b..96ac0cb9e 100644 --- a/solutions/kueue-admission-check/multikueue-setup-demo1.yaml +++ b/solutions/kueue-admission-check/multikueue-setup-demo1.yaml @@ -1,23 +1,25 @@ apiVersion: kueue.x-k8s.io/v1beta1 kind: ResourceFlavor metadata: - name: "default-flavor-demo1" + name: "default-flavor" --- apiVersion: kueue.x-k8s.io/v1beta1 kind: ClusterQueue metadata: - name: "cluster-queue-demo1" + name: "cluster-queue" spec: namespaceSelector: {} # match all. resourceGroups: - - coveredResources: ["cpu", "memory"] + - coveredResources: ["cpu", "memory","nvidia.com/gpu"] flavors: - - name: "default-flavor-demo1" + - name: "default-flavor" resources: - name: "cpu" nominalQuota: 9 - name: "memory" nominalQuota: 36Gi + - name: "nvidia.com/gpu" + nominalQuota: 3 admissionChecks: - multikueue-demo1 --- @@ -25,9 +27,9 @@ apiVersion: kueue.x-k8s.io/v1beta1 kind: LocalQueue metadata: namespace: "default" - name: "user-queue-demo1" + name: "user-queue" spec: - clusterQueue: "cluster-queue-demo1" + clusterQueue: "cluster-queue" --- apiVersion: kueue.x-k8s.io/v1beta1 kind: AdmissionCheck @@ -46,26 +48,26 @@ metadata: name: multikueue-config-demo1 spec: clusters: - - multikueue-demo1-cluster1 - - multikueue-demo1-cluster2 + - multikueue-config-demo1-cluster1 + - multikueue-config-demo1-cluster2 --- apiVersion: kueue.x-k8s.io/v1beta1 kind: MultiKueueCluster metadata: - name: multikueue-demo1-cluster1 + name: multikueue-config-demo1-cluster1 spec: kubeConfig: locationType: Secret - location: kueue-admin-cluster1-kubeconfig - # a secret called "kueue-admin-cluster1-kubeconfig" should be created in the namespace the kueue + location: multikueue-cluster1 + # a secret called " multikueue-cluster1" should be created in the namespace the kueue # controller manager runs into, holding the kubeConfig needed to connect to the # worker cluster in the "kubeconfig" key; --- apiVersion: kueue.x-k8s.io/v1beta1 kind: MultiKueueCluster metadata: - name: multikueue-demo1-cluster2 + name: multikueue-config-demo1-cluster2 spec: kubeConfig: locationType: Secret - location: kueue-admin-cluster2-kubeconfig + location: multikueue-cluster2 diff --git a/solutions/kueue-admission-check/multikueue-setup-demo2.yaml b/solutions/kueue-admission-check/multikueue-setup-demo2.yaml index ae4a2e525..4571430d6 100644 --- a/solutions/kueue-admission-check/multikueue-setup-demo2.yaml +++ b/solutions/kueue-admission-check/multikueue-setup-demo2.yaml @@ -1,18 +1,18 @@ apiVersion: kueue.x-k8s.io/v1beta1 kind: ResourceFlavor metadata: - name: "default-flavor-demo2" + name: "default-flavor" --- apiVersion: kueue.x-k8s.io/v1beta1 kind: ClusterQueue metadata: - name: "cluster-queue-demo2" + name: "cluster-queue" spec: namespaceSelector: {} # match all. resourceGroups: - coveredResources: ["cpu", "memory","nvidia.com/gpu"] flavors: - - name: "default-flavor-demo2" + - name: "default-flavor" resources: - name: "cpu" nominalQuota: 9 @@ -22,15 +22,15 @@ spec: nominalQuota: 3 admissionChecks: - multikueue-demo2 - - placement-demo2 + - multikueue-config-demo2 --- apiVersion: kueue.x-k8s.io/v1beta1 kind: LocalQueue metadata: namespace: "default" - name: "user-queue-demo2" + name: "user-queue" spec: - clusterQueue: "cluster-queue-demo2" + clusterQueue: "cluster-queue" --- apiVersion: kueue.x-k8s.io/v1beta1 kind: AdmissionCheck @@ -41,17 +41,17 @@ spec: parameters: apiGroup: kueue.x-k8s.io kind: MultiKueueConfig - name: placement-demo2 + name: multikueue-config-demo2 --- # OCM implements an admissioncheck controller to automate the MultiKueue setup process. # MultiKueueConfigs and MultiKueueClusters are generated dynamically based on OCM placement decisions. apiVersion: kueue.x-k8s.io/v1beta1 kind: AdmissionCheck metadata: - name: placement-demo2 + name: multikueue-config-demo2 spec: controllerName: open-cluster-management.io/placement parameters: apiGroup: cluster.open-cluster-management.io kind: Placement - name: placement-demo2 + name: multikueue-config-demo2 diff --git a/solutions/kueue-admission-check/placement-demo2-1.yaml b/solutions/kueue-admission-check/placement-demo2-1.yaml index 8d58c64a4..a0b239867 100644 --- a/solutions/kueue-admission-check/placement-demo2-1.yaml +++ b/solutions/kueue-admission-check/placement-demo2-1.yaml @@ -1,11 +1,11 @@ apiVersion: cluster.open-cluster-management.io/v1beta1 kind: Placement metadata: - name: placement-demo2 + name: multikueue-config-demo2 namespace: kueue-system spec: clusterSets: - - spoke + - global tolerations: - key: cluster.open-cluster-management.io/unreachable operator: Exists diff --git a/solutions/kueue-admission-check/placement-demo2-2.yaml b/solutions/kueue-admission-check/placement-demo2-2.yaml index 4934613b2..45f6ef2aa 100644 --- a/solutions/kueue-admission-check/placement-demo2-2.yaml +++ b/solutions/kueue-admission-check/placement-demo2-2.yaml @@ -1,11 +1,11 @@ apiVersion: cluster.open-cluster-management.io/v1beta1 kind: Placement metadata: - name: placement-demo2 + name: multikueue-config-demo2 namespace: kueue-system spec: clusterSets: - - spoke + - global tolerations: - key: cluster.open-cluster-management.io/unreachable operator: Exists diff --git a/solutions/kueue-admission-check/setup-env.sh b/solutions/kueue-admission-check/setup-env.sh index 74f65991a..f3fe31b7f 100755 --- a/solutions/kueue-admission-check/setup-env.sh +++ b/solutions/kueue-admission-check/setup-env.sh @@ -2,12 +2,28 @@ cd $(dirname ${BASH_SOURCE}) -set -e +set -euo pipefail -hub=${CLUSTER1:-hub} +# Parse command line arguments +FORCE=false +while [[ $# -gt 0 ]]; do + case $1 in + --force) + FORCE=true + shift + ;; + *) + echo "Unknown option: $1" + echo "Usage: $0 [--force]" + exit 1 + ;; + esac +done + +hub=${HUB:-local-cluster} c1=${CLUSTER1:-cluster1} c2=${CLUSTER2:-cluster2} -c3=${CLUSTER2:-cluster3} +c3=${CLUSTER3:-cluster3} hubctx="kind-${hub}" c1ctx="kind-${c1}" @@ -24,113 +40,132 @@ jobset_manifest="https://github.com/kubernetes-sigs/jobset/releases/download/v0. mpi_operator_manifest="https://github.com/kubeflow/mpi-operator/releases/download/v0.6.0/mpi-operator.yaml" training_operator_kustomize="github.com/kubeflow/training-operator.git/manifests/overlays/standalone?ref=v1.8.1" -# ocm setup -echo "Parepare kind clusters" -for cluster in "${all_clusters[@]}"; do - kind create cluster --name "$cluster" --image kindest/node:v1.29.0 -done +# Function to create kind clusters +create_clusters() { + if [[ "$FORCE" == "true" ]]; then + echo "Deleting existing clusters due to --force flag..." + for cluster in "${all_clusters[@]}"; do + kind delete cluster --name "$cluster" || true + done + fi -echo "Initialize the ocm hub cluster with ClusterProfile enabled" -clusteradm init --feature-gates="ManifestWorkReplicaSet=true,ManagedClusterAutoApproval=true,ClusterProfile=true" --bundle-version="v0.15.0" --wait --context ${hubctx} -joincmd=$(clusteradm get token --context ${hubctx} | grep clusteradm) + echo "Prepare kind clusters" + for cluster in "${all_clusters[@]}"; do + kind create cluster --name "$cluster" --image kindest/node:v1.29.0 || true + done +} -echo "Join clusters to hub" -$(echo ${joincmd} --force-internal-endpoint-lookup --wait --context ${c1ctx} | sed "s//$c1/g") -$(echo ${joincmd} --force-internal-endpoint-lookup --wait --context ${c2ctx} | sed "s//$c2/g") -$(echo ${joincmd} --force-internal-endpoint-lookup --wait --context ${c3ctx} | sed "s//$c3/g") +# Function to setup OCM +setup_ocm() { + echo "Initialize the ocm hub cluster" + clusteradm init --wait --context ${hubctx} + joincmd=$(clusteradm get token --context ${hubctx} | grep clusteradm) -echo "Accept join of clusters" -clusteradm accept --context ${hubctx} --clusters ${c1},${c2},${c3} --wait + echo "Join clusters to hub" + eval "${joincmd///${hub}} --force-internal-endpoint-lookup --wait --context ${hubctx}" + eval "${joincmd///${c1}} --force-internal-endpoint-lookup --wait --context ${c1ctx}" + eval "${joincmd///${c2}} --force-internal-endpoint-lookup --wait --context ${c2ctx}" + eval "${joincmd///${c3}} --force-internal-endpoint-lookup --wait --context ${c3ctx}" -kubectl get managedclusters --all-namespaces --context ${hubctx} + echo "Accept join of clusters" + clusteradm accept --context ${hubctx} --clusters ${hub},${c1},${c2},${c3} --wait -# install kueue, jobset, workflow -for ctx in "${all_ctx[@]}"; do - echo "Install Kueue, Jobset on $ctx" - kubectl apply --server-side -f "$kueue_manifest" --context "$ctx" - echo "waiting for kueue-system pods to be ready" - kubectl wait --for=condition=Ready pods --all -n kueue-system --timeout=300s --context "$ctx" - kubectl apply --server-side -f "$jobset_manifest" --context "$ctx" -done + # label local-cluster + kubectl label managedclusters ${hub} local-cluster=true --context ${hubctx} + kubectl get managedclusters --all-namespaces --context ${hubctx} +} -for ctx in "${spoke_ctx[@]}"; do - echo "Install Kubeflow MPI Operator, Training Operator on $ctx" - kubectl apply --server-side -f "$mpi_operator_manifest" --context "$ctx" || true - kubectl apply --server-side -k "$training_operator_kustomize" --context "$ctx" || true -done +# Function to install Kueue, jobset, workflow +install_kueue() { + for ctx in "${all_ctx[@]}"; do + echo "Install Kueue, Jobset on $ctx" + kubectl apply --server-side -f "$kueue_manifest" --context "$ctx" + echo "waiting for kueue-system pods to be ready" + kubectl wait --for=condition=Ready pods --all -n kueue-system --timeout=300s --context "$ctx" + kubectl apply --server-side -f "$jobset_manifest" --context "$ctx" + done -kubectl config use-context ${hubctx} -# patch some ocm resoures and images -echo "Patch permission" -kubectl patch clusterrole cluster-manager --type='json' -p "$(cat env/patch-clusterrole.json)" + for ctx in "${spoke_ctx[@]}"; do + echo "Install Kubeflow MPI Operator, Training Operator on $ctx" + kubectl apply --server-side -f "$mpi_operator_manifest" --context "$ctx" || true + kubectl apply --server-side -k "$training_operator_kustomize" --context "$ctx" || true + done +} -echo "Patch image" -# quay.io/haoqing/registration-operator:kueue-v0.9.1 grants more permission for registration and placement. -# quay.io/haoqing/registration-operator:kueue-v0.9.1 creates worker’s kubeconfig secret for multikueue. -# quay.io/haoqing/placement:kueue-v0.9.1 implements the admission check controller. -# The source code is in repo https://github.com/haoqing0110/OCM/tree/br_ocm-v0.15.1-kueue-v0.9.1. -kubectl patch deployment cluster-manager -n open-cluster-management --type=json -p='[ - {"op": "replace", "path": "/spec/template/spec/containers/0/image", "value": "quay.io/haoqing/registration-operator:kueue-v0.9.1"}, - {"op": "replace", "path": "/spec/template/spec/containers/0/imagePullPolicy", "value": "Always"} -]' -kubectl patch clustermanager cluster-manager --type=json -p='[ - {"op": "replace", "path": "/spec/registrationImagePullSpec", "value": "quay.io/haoqing/registration:kueue-v0.9.1"}, - {"op": "replace", "path": "/spec/placementImagePullSpec", "value": "quay.io/haoqing/placement:kueue-v0.9.1"} -]' +# Function to install OCM addons +install_ocm_addons() { + kubectl config use-context ${hubctx} -# install addons -echo "Install managed-serviceaccount" -helm repo add ocm https://open-cluster-management.io/helm-charts/ -helm repo update -helm uninstall -n open-cluster-management-addon managed-serviceaccount || true -helm install \ - -n open-cluster-management-addon --create-namespace \ - managed-serviceaccount ocm/managed-serviceaccount \ - --set tag=latest \ - --set featureGates.ephemeralIdentity=true \ - --set enableAddOnDeploymentConfig=true \ - --set hubDeployMode=AddOnTemplate + echo "Add ocm helm repo" + helm repo add ocm https://open-cluster-management.io/helm-charts/ + helm repo update -echo "Install managed-serviceaccount mca" -clusteradm create clusterset spoke -clusteradm clusterset set spoke --clusters ${c1},${c2},${c3} -clusteradm clusterset bind spoke --namespace default -kubectl apply -f env/placement.yaml || true -kubectl patch clustermanagementaddon managed-serviceaccount --type='json' -p="$(cat env/patch-mg-sa-cma.json)" || true + echo "Install managed-serviceaccount" + helm upgrade --install \ + -n open-cluster-management-addon --create-namespace \ + managed-serviceaccount ocm/managed-serviceaccount \ + --set featureGates.ephemeralIdentity=true \ + --set enableAddOnDeploymentConfig=true \ + --set hubDeployMode=AddOnTemplate -echo "Install cluster-permission" -git clone git@github.com:open-cluster-management-io/cluster-permission.git || true -cd cluster-permission -helm uninstall -n open-cluster-management cluster-permission || true -helm install cluster-permission chart/ \ - --namespace open-cluster-management \ - --create-namespace \ - --set global.imageOverrides.cluster_permission=quay.io/open-cluster-management/cluster-permission:latest \ - --set global.pullPolicy=Always -cd - -rm -rf cluster-permission + echo "Install cluster-permission" + helm upgrade --install \ + -n open-cluster-management --create-namespace \ + cluster-permission ocm/cluster-permission \ + --set global.imageOverrides.cluster_permission=quay.io/open-cluster-management/cluster-permission:latest -echo "Install resource-usage-collect-addon" -git clone git@github.com:open-cluster-management-io/addon-contrib.git || true -cd addon-contrib/resource-usage-collect-addon -make deploy -cd - -rm -rf addon-contrib + echo "Install kueue-addon" + helm upgrade --install \ + -n open-cluster-management-addon --create-namespace \ + kueue-addon ocm/kueue-addon \ + --set skipClusterSetBinding=true -# prepare credentials for multikueue -echo "Setup queue on the spoke" -kubectl apply -f env/single-clusterqueue-setup-mwrs.yaml + echo "Install resource-usage-collect-addon" + git clone https://github.com/open-cluster-management-io/addon-contrib.git || true + cd addon-contrib/resource-usage-collect-addon + helm install resource-usage-collect-addon chart/ \ + -n open-cluster-management-addon --create-namespace \ + --set skipClusterSetBinding=true \ + --set global.image.repository=quay.io/haoqing/resource-usage-collect-addon + cd - -echo "Setup credentials for clusterprofile" -for CLUSTER in "${spoke_clusters[@]}"; do - sed "s/CLUSTER_NAME/$CLUSTER/g" env/clusterpermission.yaml | kubectl apply -f - - sed "s/CLUSTER_NAME/$CLUSTER/g" env/msa.yaml | kubectl apply -f - -done + rm -rf addon-contrib +} -echo "Setup faked GPU on the spoke" -kubectl label managedcluster cluster2 accelerator=nvidia-tesla-t4 -kubectl label managedcluster cluster3 accelerator=nvidia-tesla-t4 +# Function to setup fake GPU +setup_fake_gpu() { + echo "Setup fake GPU on the spoke clusters" + kubectl label managedcluster cluster2 accelerator=nvidia-tesla-t4 --context ${hubctx} + kubectl label managedcluster cluster3 accelerator=nvidia-tesla-t4 --context ${hubctx} -echo "IMPORTANT: RUN BELOW COMMAND MANUALLY on cluster2 and cluster3 !!!" -echo "kubectl edit-status node cluster2-control-plane --context ${c2ctx}" with nvidia.com/gpu: "3" -echo "kubectl edit-status node cluster3-control-plane --context ${c3ctx}" with nvidia.com/gpu: "3" + kubectl patch node cluster2-control-plane --subresource=status --type='merge' --patch='{ + "status": { + "capacity": { + "nvidia.com/gpu": "3" + }, + "allocatable": { + "nvidia.com/gpu": "3" + } + } + }' --context ${c2ctx} + + kubectl patch node cluster3-control-plane --subresource=status --type='merge' --patch='{ + "status": { + "capacity": { + "nvidia.com/gpu": "3" + }, + "allocatable": { + "nvidia.com/gpu": "3" + } + } + }' --context ${c3ctx} + + echo "Fake GPU resources added successfully to cluster2 and cluster3!" +} + +# Main execution +create_clusters +setup_ocm +install_kueue +install_ocm_addons +setup_fake_gpu \ No newline at end of file