From f6dec25bdf9ef0e492c85b2c1a65aea62627bddc Mon Sep 17 00:00:00 2001 From: Yang Le Date: Tue, 27 Jan 2026 21:06:21 +0800 Subject: [PATCH] =?UTF-8?q?=E2=9C=A8=20add=20contoller=20to=20support=20to?= =?UTF-8?q?ken=20infrastructure=20(#1340)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Yang Le --- .../templates/cluster_role.yaml | 8 +- .../config/rbac/cluster_role.yaml | 8 +- .../hub/addon-manager/clusterrole.yaml | 14 +- .../hub/addon-manager/token-role.yaml | 17 + .../hub/addon-manager/token-rolebinding.yaml | 19 + .../addon-manager/token-serviceaccount.yaml | 8 + .../hub/grpc-server/clusterrole.yaml | 3 + .../hub/registration/clusterrole.yaml | 2 +- .../controllers/addontokeninfra/controller.go | 430 ++++++++++ .../addontokeninfra/controller_test.go | 796 ++++++++++++++++++ pkg/addon/manager.go | 27 + .../addon/token_infrastructure_test.go | 320 +++++++ 12 files changed, 1644 insertions(+), 8 deletions(-) create mode 100644 manifests/cluster-manager/hub/addon-manager/token-role.yaml create mode 100644 manifests/cluster-manager/hub/addon-manager/token-rolebinding.yaml create mode 100644 manifests/cluster-manager/hub/addon-manager/token-serviceaccount.yaml create mode 100644 pkg/addon/controllers/addontokeninfra/controller.go create mode 100644 pkg/addon/controllers/addontokeninfra/controller_test.go create mode 100644 test/integration/addon/token_infrastructure_test.go diff --git a/deploy/cluster-manager/chart/cluster-manager/templates/cluster_role.yaml b/deploy/cluster-manager/chart/cluster-manager/templates/cluster_role.yaml index a72388e81..297f8f07c 100644 --- a/deploy/cluster-manager/chart/cluster-manager/templates/cluster_role.yaml +++ b/deploy/cluster-manager/chart/cluster-manager/templates/cluster_role.yaml @@ -9,9 +9,11 @@ rules: verbs: ["create", "get", "list", "update", "watch", "patch", "delete", "deletecollection"] - apiGroups: [""] resources: ["serviceaccounts/token"] - resourceNames: - - "agent-registration-bootstrap" - verbs: ["get", "create"] + verbs: ["create"] +# Allow gRPC client to connect to the gRPC server on hub +- apiGroups: [""] + resources: ["serviceaccounts/token"] + verbs: ["subscribe"] - apiGroups: [""] resources: ["pods"] verbs: ["get"] diff --git a/deploy/cluster-manager/config/rbac/cluster_role.yaml b/deploy/cluster-manager/config/rbac/cluster_role.yaml index 728d9a67f..8f2026691 100644 --- a/deploy/cluster-manager/config/rbac/cluster_role.yaml +++ b/deploy/cluster-manager/config/rbac/cluster_role.yaml @@ -11,9 +11,11 @@ rules: verbs: ["create", "get", "list", "update", "watch", "patch", "delete", "deletecollection"] - apiGroups: [""] resources: ["serviceaccounts/token"] - resourceNames: - - "agent-registration-bootstrap" - verbs: ["get", "create"] + verbs: ["create"] +# Allow gRPC client to connect to the gRPC server on hub +- apiGroups: [""] + resources: ["serviceaccounts/token"] + verbs: ["subscribe"] - apiGroups: [""] resources: ["pods"] verbs: ["get"] diff --git a/manifests/cluster-manager/hub/addon-manager/clusterrole.yaml b/manifests/cluster-manager/hub/addon-manager/clusterrole.yaml index aec895337..10782adaf 100644 --- a/manifests/cluster-manager/hub/addon-manager/clusterrole.yaml +++ b/manifests/cluster-manager/hub/addon-manager/clusterrole.yaml @@ -66,5 +66,17 @@ rules: resources: ["signers"] verbs: ["approve", "sign"] - apiGroups: ["rbac.authorization.k8s.io"] - resources: ["rolebindings"] + resources: ["rolebindings", "roles"] verbs: ["get", "list", "watch", "create", "update", "delete"] +# Allow controller to manage serviceaccounts for token-based authentication +- apiGroups: [""] + resources: ["serviceaccounts"] + verbs: ["get", "list", "watch", "create", "update", "delete"] +# Allow controller to create RoleBindings that grant serviceaccounts/token permission +- apiGroups: [""] + resources: ["serviceaccounts/token"] + verbs: ["create"] +# Allow gRPC client to connect to the gRPC server on hub +- apiGroups: [""] + resources: ["serviceaccounts/token"] + verbs: ["subscribe"] diff --git a/manifests/cluster-manager/hub/addon-manager/token-role.yaml b/manifests/cluster-manager/hub/addon-manager/token-role.yaml new file mode 100644 index 000000000..3bbdfd07b --- /dev/null +++ b/manifests/cluster-manager/hub/addon-manager/token-role.yaml @@ -0,0 +1,17 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: "{{ .AddonName }}-token-role" + namespace: "{{ .ClusterName }}" + labels: + addon.open-cluster-management.io/name: "{{ .AddonName }}" + addon.open-cluster-management.io/token-infrastructure: "true" +rules: +- apiGroups: [""] + resources: ["serviceaccounts/token"] + resourceNames: ["{{ .AddonName }}-agent"] + verbs: ["create"] +# Allow gRPC client to connect to the gRPC server on hub +- apiGroups: [""] + resources: ["serviceaccounts/token"] + verbs: ["subscribe"] diff --git a/manifests/cluster-manager/hub/addon-manager/token-rolebinding.yaml b/manifests/cluster-manager/hub/addon-manager/token-rolebinding.yaml new file mode 100644 index 000000000..10ea99dc7 --- /dev/null +++ b/manifests/cluster-manager/hub/addon-manager/token-rolebinding.yaml @@ -0,0 +1,19 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: "{{ .AddonName }}-token-role" + namespace: "{{ .ClusterName }}" + labels: + addon.open-cluster-management.io/name: "{{ .AddonName }}" + addon.open-cluster-management.io/token-infrastructure: "true" +subjects: +- kind: Group + name: "system:open-cluster-management:{{ .ClusterName }}" + apiGroup: rbac.authorization.k8s.io +- kind: Group + name: "open-cluster-management:{{ .ClusterName }}" + apiGroup: rbac.authorization.k8s.io +roleRef: + kind: Role + name: "{{ .AddonName }}-token-role" + apiGroup: rbac.authorization.k8s.io diff --git a/manifests/cluster-manager/hub/addon-manager/token-serviceaccount.yaml b/manifests/cluster-manager/hub/addon-manager/token-serviceaccount.yaml new file mode 100644 index 000000000..c6687b9ae --- /dev/null +++ b/manifests/cluster-manager/hub/addon-manager/token-serviceaccount.yaml @@ -0,0 +1,8 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: "{{ .AddonName }}-agent" + namespace: "{{ .ClusterName }}" + labels: + addon.open-cluster-management.io/name: "{{ .AddonName }}" + addon.open-cluster-management.io/token-infrastructure: "true" diff --git a/manifests/cluster-manager/hub/grpc-server/clusterrole.yaml b/manifests/cluster-manager/hub/grpc-server/clusterrole.yaml index 819ade261..14a697c0b 100644 --- a/manifests/cluster-manager/hub/grpc-server/clusterrole.yaml +++ b/manifests/cluster-manager/hub/grpc-server/clusterrole.yaml @@ -51,3 +51,6 @@ rules: - apiGroups: ["authentication.k8s.io"] resources: ["tokenreviews"] verbs: ["create"] +- apiGroups: [""] + resources: ["serviceaccounts/token"] + verbs: ["create"] diff --git a/manifests/cluster-manager/hub/registration/clusterrole.yaml b/manifests/cluster-manager/hub/registration/clusterrole.yaml index 7f9729442..4c27b426d 100644 --- a/manifests/cluster-manager/hub/registration/clusterrole.yaml +++ b/manifests/cluster-manager/hub/registration/clusterrole.yaml @@ -24,7 +24,7 @@ rules: resources: ["serviceaccounts/token"] resourceNames: - "agent-registration-bootstrap" - verbs: ["get", "create"] + verbs: ["create"] - apiGroups: [""] resources: ["pods"] verbs: ["get"] diff --git a/pkg/addon/controllers/addontokeninfra/controller.go b/pkg/addon/controllers/addontokeninfra/controller.go new file mode 100644 index 000000000..b3cfc1aed --- /dev/null +++ b/pkg/addon/controllers/addontokeninfra/controller.go @@ -0,0 +1,430 @@ +package addontokeninfra + +import ( + "context" + "fmt" + + "github.com/openshift/library-go/pkg/assets" + "github.com/openshift/library-go/pkg/operator/resource/resourceapply" + certificatesv1 "k8s.io/api/certificates/v1" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/api/meta" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + utilerrors "k8s.io/apimachinery/pkg/util/errors" + utilruntime "k8s.io/apimachinery/pkg/util/runtime" + coreinformers "k8s.io/client-go/informers/core/v1" + rbacinformers "k8s.io/client-go/informers/rbac/v1" + "k8s.io/client-go/kubernetes" + "k8s.io/client-go/tools/cache" + "k8s.io/klog/v2" + + addonapiv1alpha1 "open-cluster-management.io/api/addon/v1alpha1" + addonv1alpha1client "open-cluster-management.io/api/client/addon/clientset/versioned" + addoninformerv1alpha1 "open-cluster-management.io/api/client/addon/informers/externalversions/addon/v1alpha1" + addonlisterv1alpha1 "open-cluster-management.io/api/client/addon/listers/addon/v1alpha1" + "open-cluster-management.io/sdk-go/pkg/basecontroller/events" + "open-cluster-management.io/sdk-go/pkg/basecontroller/factory" + "open-cluster-management.io/sdk-go/pkg/patcher" + + "open-cluster-management.io/ocm/manifests" + "open-cluster-management.io/ocm/pkg/common/queue" + commonrecorder "open-cluster-management.io/ocm/pkg/common/recorder" +) + +var ( + tokenInfraManifests = []string{ + "cluster-manager/hub/addon-manager/token-serviceaccount.yaml", + "cluster-manager/hub/addon-manager/token-role.yaml", + "cluster-manager/hub/addon-manager/token-rolebinding.yaml", + } +) + +const ( + // TokenInfrastructureReadyCondition is the condition type indicating token infrastructure is ready + TokenInfrastructureReadyCondition = "TokenInfrastructureReady" +) + +// TokenInfraConfig holds configuration for rendering token infrastructure manifests +type TokenInfraConfig struct { + ClusterName string + AddonName string +} + +// tokenInfrastructureController reconciles ManagedClusterAddOn resources +// to create token-based authentication infrastructure +type tokenInfrastructureController struct { + kubeClient kubernetes.Interface + addonClient addonv1alpha1client.Interface + addonLister addonlisterv1alpha1.ManagedClusterAddOnLister + cache resourceapply.ResourceCache + recorder events.Recorder +} + +// usesTokenAuth checks if the addon uses token-based authentication +// by checking if it has kubeClient registration and kubeClientDriver is "token" +func usesTokenAuth(addon *addonapiv1alpha1.ManagedClusterAddOn) bool { + // First check if addon has kubeClient registration + hasKubeClient := false + for _, reg := range addon.Status.Registrations { + if reg.SignerName == certificatesv1.KubeAPIServerClientSignerName { + hasKubeClient = true + break + } + } + + if !hasKubeClient { + return false + } + + // Then check if kubeClientDriver is "token" + return addon.Status.KubeClientDriver == "token" +} + +// addonFilter filters addons that use token-based kubeClient registration +// or have TokenInfrastructureReady condition (for cleanup) +func addonFilter(obj interface{}) bool { + addon, ok := obj.(*addonapiv1alpha1.ManagedClusterAddOn) + if !ok { + return false + } + + // Check if addon uses token authentication + if usesTokenAuth(addon) { + return true + } + + // Check if addon has TokenInfrastructureReady condition (needs cleanup) + if meta.FindStatusCondition(addon.Status.Conditions, TokenInfrastructureReadyCondition) != nil { + return true + } + + return false +} + +// tokenInfraResourceToAddonKey extracts the addon namespace/name from token infrastructure resource labels +// Returns empty string if resource doesn't have the required labels +func tokenInfraResourceToAddonKey(obj runtime.Object) string { + metaObj, err := meta.Accessor(obj) + if err != nil { + return "" + } + + labels := metaObj.GetLabels() + if labels == nil { + return "" + } + + // Only process resources with token-infrastructure label + if labels["addon.open-cluster-management.io/token-infrastructure"] != "true" { + return "" + } + + // Extract addon name from label + addonName := labels["addon.open-cluster-management.io/name"] + if addonName == "" { + return "" + } + + // Namespace is the cluster name + namespace := metaObj.GetNamespace() + if namespace == "" { + return "" + } + + // Return namespace/addonName as queue key + return namespace + "/" + addonName +} + +// newTokenInfraEventHandler creates an event handler for token infrastructure resources +// that enqueues the associated addon for reconciliation on Update and Delete events +func newTokenInfraEventHandler(syncCtx factory.SyncContext, queueKeyFn func(runtime.Object) string) cache.ResourceEventHandler { + return &cache.ResourceEventHandlerFuncs{ + UpdateFunc: func(old, new interface{}) { + newObj, ok := new.(runtime.Object) + if !ok { + utilruntime.HandleError(fmt.Errorf("error decoding object, invalid type")) + return + } + if key := queueKeyFn(newObj); key != "" { + syncCtx.Queue().Add(key) + } + }, + DeleteFunc: func(obj interface{}) { + var runtimeObj runtime.Object + var ok bool + + if tombstone, isTombstone := obj.(cache.DeletedFinalStateUnknown); isTombstone { + runtimeObj, ok = tombstone.Obj.(runtime.Object) + if !ok { + utilruntime.HandleError(fmt.Errorf("error decoding object tombstone, invalid type")) + return + } + } else { + runtimeObj, ok = obj.(runtime.Object) + if !ok { + utilruntime.HandleError(fmt.Errorf("error decoding object, invalid type")) + return + } + } + + if key := queueKeyFn(runtimeObj); key != "" { + syncCtx.Queue().Add(key) + } + }, + } +} + +func NewTokenInfrastructureController( + kubeClient kubernetes.Interface, + addonClient addonv1alpha1client.Interface, + addonInformers addoninformerv1alpha1.ManagedClusterAddOnInformer, + serviceAccountInformer coreinformers.ServiceAccountInformer, + roleInformer rbacinformers.RoleInformer, + roleBindingInformer rbacinformers.RoleBindingInformer, +) factory.Controller { + c := &tokenInfrastructureController{ + kubeClient: kubeClient, + addonClient: addonClient, + addonLister: addonInformers.Lister(), + cache: resourceapply.NewResourceCache(), + recorder: events.NewContextualLoggingEventRecorder("addon-token-infrastructure-controller"), + } + + syncCtx := factory.NewSyncContext("addon-token-infrastructure-controller") + + // Register custom event handlers on infrastructure informers + // Only handle Update and Delete, NOT Add (creation is triggered by addon watch) + eventHandler := newTokenInfraEventHandler(syncCtx, tokenInfraResourceToAddonKey) + + // Register the same event handler for all infrastructure informers + saInformer := serviceAccountInformer.Informer() + _, err := saInformer.AddEventHandler(eventHandler) + if err != nil { + utilruntime.HandleError(err) + } + + rInformer := roleInformer.Informer() + _, err = rInformer.AddEventHandler(eventHandler) + if err != nil { + utilruntime.HandleError(err) + } + + rbInformer := roleBindingInformer.Informer() + _, err = rbInformer.AddEventHandler(eventHandler) + if err != nil { + utilruntime.HandleError(err) + } + + return factory.New(). + WithSyncContext(syncCtx). + // Primary watch: ManagedClusterAddOns with token registration + WithFilteredEventsInformersQueueKeysFunc( + queue.QueueKeyByMetaNamespaceName, + addonFilter, + addonInformers.Informer(), + ). + // Bare informers with custom handlers (already registered above) + WithBareInformers( + saInformer, + rInformer, + rbInformer, + ). + WithSync(c.sync). + ToController("addon-token-infrastructure-controller") +} + +func (c *tokenInfrastructureController) sync(ctx context.Context, syncCtx factory.SyncContext, key string) error { + logger := klog.FromContext(ctx).WithValues("addon", key) + logger.V(4).Info("Reconciling addon token authentication") + + clusterName, addonName, err := cache.SplitMetaNamespaceKey(key) + if err != nil { + // ignore addon whose key is invalid + return nil + } + + addon, err := c.addonLister.ManagedClusterAddOns(clusterName).Get(addonName) + if errors.IsNotFound(err) { + // Addon is deleted, attempt to clean up any remaining token infrastructure + logger.Info("Addon not found, cleaning up any remaining token infrastructure") + return c.cleanupTokenInfrastructure(ctx, clusterName, addonName) + } + if err != nil { + return err + } + + // If addon is being deleted, clean up token infrastructure + if addon.DeletionTimestamp != nil { + logger.Info("Addon is being deleted, cleaning up token infrastructure") + // Check if TokenInfrastructureReady condition exists + infraReady := meta.FindStatusCondition(addon.Status.Conditions, TokenInfrastructureReadyCondition) + if infraReady != nil { + // Clean up token infrastructure resources + // No need to remove the condition - the addon is being deleted anyway + return c.cleanupTokenInfrastructure(ctx, clusterName, addonName) + } + return nil + } + + // Check if addon uses token authentication + if !usesTokenAuth(addon) { + // Check if TokenInfrastructureReady condition exists + infraReady := meta.FindStatusCondition(addon.Status.Conditions, TokenInfrastructureReadyCondition) + if infraReady == nil { + // No condition, nothing to clean up + logger.V(4).Info("No token-based kubeClient authentication found and no condition exists, skipping") + return nil + } + + // Clean up token infrastructure resources + logger.Info("Addon no longer uses token-based authentication, cleaning up token infrastructure") + if err := c.cleanupTokenInfrastructure(ctx, clusterName, addonName); err != nil { + return err + } + + // Remove TokenInfrastructureReady condition + return c.removeCondition(ctx, addon) + } + + // Ensure token infrastructure is created and ready + return c.ensureTokenInfrastructure(ctx, addon, clusterName, addonName) +} + +// ensureTokenInfrastructure creates and maintains token authentication infrastructure +func (c *tokenInfrastructureController) ensureTokenInfrastructure( + ctx context.Context, + addon *addonapiv1alpha1.ManagedClusterAddOn, + clusterName, addonName string) error { + + config := TokenInfraConfig{ + ClusterName: clusterName, + AddonName: addonName, + } + + // Apply manifests + resourceResults := c.applyManifests(ctx, config) + + var saUID string + var errs []error + for _, result := range resourceResults { + if result.Error != nil { + errs = append(errs, fmt.Errorf("%q (%T): %v", result.File, result.Type, result.Error)) + } + // Extract ServiceAccount UID + if sa, ok := result.Result.(*corev1.ServiceAccount); ok { + saUID = string(sa.UID) + } + } + + if len(errs) > 0 { + updateErr := c.updateCondition(ctx, addon, metav1.ConditionFalse, "TokenInfrastructureApplyFailed", + fmt.Sprintf("Failed to apply token infrastructure: %v", utilerrors.NewAggregate(errs))) + // Append updateErr to errs and return aggregate (NewAggregate filters out nil errors) + return utilerrors.NewAggregate(append(errs, updateErr)) + } + + // Set TokenInfrastructureReady condition with ServiceAccount UID + serviceAccountName := fmt.Sprintf("%s-agent", addonName) + message := fmt.Sprintf("ServiceAccount %s/%s (UID: %s) is ready", clusterName, serviceAccountName, saUID) + return c.updateCondition(ctx, addon, metav1.ConditionTrue, "TokenInfrastructureReady", message) +} + +// applyManifests applies token infrastructure manifests +func (c *tokenInfrastructureController) applyManifests(ctx context.Context, config TokenInfraConfig) []resourceapply.ApplyResult { + recorderWrapper := commonrecorder.NewEventsRecorderWrapper(ctx, c.recorder) + return resourceapply.ApplyDirectly( + ctx, + resourceapply.NewKubeClientHolder(c.kubeClient), + recorderWrapper, + c.cache, + func(name string) ([]byte, error) { + template, err := manifests.ClusterManagerManifestFiles.ReadFile(name) + if err != nil { + return nil, err + } + return assets.MustCreateAssetFromTemplate(name, template, config).Data, nil + }, + tokenInfraManifests..., + ) +} + +// updateCondition updates the TokenInfrastructureReady condition on the addon +func (c *tokenInfrastructureController) updateCondition(ctx context.Context, addon *addonapiv1alpha1.ManagedClusterAddOn, + status metav1.ConditionStatus, reason, message string) error { + addonPatcher := patcher.NewPatcher[ + *addonapiv1alpha1.ManagedClusterAddOn, + addonapiv1alpha1.ManagedClusterAddOnSpec, + addonapiv1alpha1.ManagedClusterAddOnStatus]( + c.addonClient.AddonV1alpha1().ManagedClusterAddOns(addon.Namespace)) + + addonCopy := addon.DeepCopy() + + condition := metav1.Condition{ + Type: TokenInfrastructureReadyCondition, + Status: status, + Reason: reason, + Message: message, + } + + meta.SetStatusCondition(&addonCopy.Status.Conditions, condition) + + _, err := addonPatcher.PatchStatus(ctx, addonCopy, addonCopy.Status, addon.Status) + return err +} + +// removeCondition removes the TokenInfrastructureReady condition from the addon +func (c *tokenInfrastructureController) removeCondition(ctx context.Context, addon *addonapiv1alpha1.ManagedClusterAddOn) error { + addonPatcher := patcher.NewPatcher[ + *addonapiv1alpha1.ManagedClusterAddOn, + addonapiv1alpha1.ManagedClusterAddOnSpec, + addonapiv1alpha1.ManagedClusterAddOnStatus]( + c.addonClient.AddonV1alpha1().ManagedClusterAddOns(addon.Namespace)) + + addonCopy := addon.DeepCopy() + + // Remove the TokenInfrastructureReady condition + meta.RemoveStatusCondition(&addonCopy.Status.Conditions, TokenInfrastructureReadyCondition) + + _, err := addonPatcher.PatchStatus(ctx, addonCopy, addonCopy.Status, addon.Status) + return err +} + +// cleanupTokenInfrastructure removes token authentication infrastructure resources +func (c *tokenInfrastructureController) cleanupTokenInfrastructure(ctx context.Context, clusterName, addonName string) error { + logger := klog.FromContext(ctx) + config := TokenInfraConfig{ + ClusterName: clusterName, + AddonName: addonName, + } + + recorderWrapper := commonrecorder.NewEventsRecorderWrapper(ctx, c.recorder) + resourceResults := resourceapply.DeleteAll( + ctx, + resourceapply.NewKubeClientHolder(c.kubeClient), + recorderWrapper, + func(name string) ([]byte, error) { + template, err := manifests.ClusterManagerManifestFiles.ReadFile(name) + if err != nil { + return nil, err + } + return assets.MustCreateAssetFromTemplate(name, template, config).Data, nil + }, + tokenInfraManifests..., + ) + + var errs []error + for _, result := range resourceResults { + if result.Error != nil { + errs = append(errs, fmt.Errorf("%q (%T): %v", result.File, result.Type, result.Error)) + } + } + + if len(errs) > 0 { + return utilerrors.NewAggregate(errs) + } + + logger.Info("Successfully cleaned up token infrastructure", "addon", addonName) + return nil +} diff --git a/pkg/addon/controllers/addontokeninfra/controller_test.go b/pkg/addon/controllers/addontokeninfra/controller_test.go new file mode 100644 index 000000000..c416cedff --- /dev/null +++ b/pkg/addon/controllers/addontokeninfra/controller_test.go @@ -0,0 +1,796 @@ +package addontokeninfra + +import ( + "context" + "encoding/json" + "testing" + "time" + + certificatesv1 "k8s.io/api/certificates/v1" + corev1 "k8s.io/api/core/v1" + rbacv1 "k8s.io/api/rbac/v1" + "k8s.io/apimachinery/pkg/api/meta" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" + utilruntime "k8s.io/apimachinery/pkg/util/runtime" + "k8s.io/client-go/informers" + kubefake "k8s.io/client-go/kubernetes/fake" + clienttesting "k8s.io/client-go/testing" + "k8s.io/client-go/tools/cache" + + "open-cluster-management.io/addon-framework/pkg/addonmanager/addontesting" + addonapiv1alpha1 "open-cluster-management.io/api/addon/v1alpha1" + fakeaddon "open-cluster-management.io/api/client/addon/clientset/versioned/fake" + addoninformers "open-cluster-management.io/api/client/addon/informers/externalversions" + + testingcommon "open-cluster-management.io/ocm/pkg/common/testing" +) + +func newAddonWithTokenRegistration(name, cluster string) *addonapiv1alpha1.ManagedClusterAddOn { + addon := addontesting.NewAddon(name, cluster) + addon.Status.Registrations = []addonapiv1alpha1.RegistrationConfig{ + { + SignerName: certificatesv1.KubeAPIServerClientSignerName, + }, + } + addon.Status.KubeClientDriver = "token" + return addon +} + +func newAddonWithCSRRegistration(name, cluster string) *addonapiv1alpha1.ManagedClusterAddOn { + addon := addontesting.NewAddon(name, cluster) + addon.Status.Registrations = []addonapiv1alpha1.RegistrationConfig{ + { + SignerName: certificatesv1.KubeAPIServerClientSignerName, + }, + } + addon.Status.KubeClientDriver = "csr" + return addon +} + +func newAddonWithTokenInfraCondition(name, cluster string, status metav1.ConditionStatus) *addonapiv1alpha1.ManagedClusterAddOn { + addon := addontesting.NewAddon(name, cluster) + addon.Status.Registrations = []addonapiv1alpha1.RegistrationConfig{ + { + SignerName: certificatesv1.KubeAPIServerClientSignerName, + }, + } + addon.Status.KubeClientDriver = "token" + meta.SetStatusCondition(&addon.Status.Conditions, metav1.Condition{ + Type: TokenInfrastructureReadyCondition, + Status: status, + Reason: "TokenInfrastructureReady", + Message: "ServiceAccount cluster1/test-agent (UID: test-uid) is ready", + }) + return addon +} + +func TestAddonFilter(t *testing.T) { + cases := []struct { + name string + addon interface{} + expected bool + }{ + { + name: "not an addon object", + addon: &corev1.Pod{}, + expected: false, + }, + { + name: "addon with token driver", + addon: newAddonWithTokenRegistration("test", "cluster1"), + expected: true, + }, + { + name: "addon with CSR driver", + addon: newAddonWithCSRRegistration("test", "cluster1"), + expected: false, + }, + { + name: "addon with TokenInfrastructureReady condition", + addon: newAddonWithTokenInfraCondition("test", "cluster1", metav1.ConditionTrue), + expected: true, + }, + { + name: "addon without token driver or condition", + addon: addontesting.NewAddon("test", "cluster1"), + expected: false, + }, + } + + for _, c := range cases { + t.Run(c.name, func(t *testing.T) { + result := addonFilter(c.addon) + if result != c.expected { + t.Errorf("expected %v, got %v", c.expected, result) + } + }) + } +} + +func TestTokenInfraResourceToAddonKey(t *testing.T) { + cases := []struct { + name string + obj runtime.Object + expected string + }{ + { + name: "serviceaccount with correct labels", + obj: &corev1.ServiceAccount{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-agent", + Namespace: "cluster1", + Labels: map[string]string{ + "addon.open-cluster-management.io/token-infrastructure": "true", + "addon.open-cluster-management.io/name": "test", + }, + }, + }, + expected: "cluster1/test", + }, + { + name: "role with correct labels", + obj: &rbacv1.Role{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-token-role", + Namespace: "cluster1", + Labels: map[string]string{ + "addon.open-cluster-management.io/token-infrastructure": "true", + "addon.open-cluster-management.io/name": "test", + }, + }, + }, + expected: "cluster1/test", + }, + { + name: "rolebinding with correct labels", + obj: &rbacv1.RoleBinding{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-token-role", + Namespace: "cluster1", + Labels: map[string]string{ + "addon.open-cluster-management.io/token-infrastructure": "true", + "addon.open-cluster-management.io/name": "test", + }, + }, + }, + expected: "cluster1/test", + }, + { + name: "resource without token-infrastructure label", + obj: &corev1.ServiceAccount{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-agent", + Namespace: "cluster1", + Labels: map[string]string{ + "addon.open-cluster-management.io/name": "test", + }, + }, + }, + expected: "", + }, + { + name: "resource with token-infrastructure=false", + obj: &corev1.ServiceAccount{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-agent", + Namespace: "cluster1", + Labels: map[string]string{ + "addon.open-cluster-management.io/token-infrastructure": "false", + "addon.open-cluster-management.io/name": "test", + }, + }, + }, + expected: "", + }, + { + name: "resource without addon name label", + obj: &corev1.ServiceAccount{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-agent", + Namespace: "cluster1", + Labels: map[string]string{ + "addon.open-cluster-management.io/token-infrastructure": "true", + }, + }, + }, + expected: "", + }, + { + name: "resource without namespace", + obj: &corev1.ServiceAccount{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-agent", + Labels: map[string]string{ + "addon.open-cluster-management.io/token-infrastructure": "true", + "addon.open-cluster-management.io/name": "test", + }, + }, + }, + expected: "", + }, + { + name: "resource without any labels", + obj: &corev1.ServiceAccount{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-agent", + Namespace: "cluster1", + }, + }, + expected: "", + }, + } + + for _, c := range cases { + t.Run(c.name, func(t *testing.T) { + result := tokenInfraResourceToAddonKey(c.obj) + if result != c.expected { + t.Errorf("expected %q, got %q", c.expected, result) + } + }) + } +} + +func TestEventHandler(t *testing.T) { + cases := []struct { + name string + eventType string + obj interface{} + oldObj interface{} + expectQueue bool + expectError bool + }{ + { + name: "update with valid resource", + eventType: "update", + obj: &corev1.ServiceAccount{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-agent", + Namespace: "cluster1", + Labels: map[string]string{ + "addon.open-cluster-management.io/token-infrastructure": "true", + "addon.open-cluster-management.io/name": "test", + }, + }, + }, + oldObj: &corev1.ServiceAccount{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-agent", + Namespace: "cluster1", + }, + }, + expectQueue: true, + expectError: false, + }, + { + name: "delete with valid resource", + eventType: "delete", + obj: &corev1.ServiceAccount{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-agent", + Namespace: "cluster1", + Labels: map[string]string{ + "addon.open-cluster-management.io/token-infrastructure": "true", + "addon.open-cluster-management.io/name": "test", + }, + }, + }, + expectQueue: true, + expectError: false, + }, + { + name: "delete with tombstone", + eventType: "delete", + obj: cache.DeletedFinalStateUnknown{ + Key: "cluster1/test-agent", + Obj: &corev1.ServiceAccount{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-agent", + Namespace: "cluster1", + Labels: map[string]string{ + "addon.open-cluster-management.io/token-infrastructure": "true", + "addon.open-cluster-management.io/name": "test", + }, + }, + }, + }, + expectQueue: true, + expectError: false, + }, + { + name: "update with invalid type", + eventType: "update", + obj: "not-a-runtime-object", + oldObj: &corev1.ServiceAccount{}, + expectQueue: false, + expectError: true, + }, + { + name: "delete with invalid type", + eventType: "delete", + obj: "not-a-runtime-object", + expectQueue: false, + expectError: true, + }, + { + name: "delete with invalid tombstone", + eventType: "delete", + obj: cache.DeletedFinalStateUnknown{ + Key: "cluster1/test-agent", + Obj: "not-a-runtime-object", + }, + expectQueue: false, + expectError: true, + }, + } + + for _, c := range cases { + t.Run(c.name, func(t *testing.T) { + syncCtx := testingcommon.NewFakeSyncContext(t, "test-key") + handler := newTokenInfraEventHandler(syncCtx, tokenInfraResourceToAddonKey) + + // Capture runtime errors + errorCaptured := false + utilruntime.ErrorHandlers = []utilruntime.ErrorHandler{ + func(ctx context.Context, err error, msg string, keysAndValues ...interface{}) { + errorCaptured = true + }, + } + + switch c.eventType { + case "update": + handler.OnUpdate(c.oldObj, c.obj) + case "delete": + handler.OnDelete(c.obj) + } + + if c.expectError && !errorCaptured { + t.Errorf("expected error to be captured but got none") + } + if !c.expectError && errorCaptured { + t.Errorf("unexpected error was captured") + } + + queueLen := syncCtx.Queue().Len() + if c.expectQueue && queueLen == 0 { + t.Errorf("expected item in queue but queue is empty") + } + if !c.expectQueue && queueLen > 0 { + t.Errorf("expected empty queue but got %d items", queueLen) + } + + // Drain the queue + for syncCtx.Queue().Len() > 0 { + item, _ := syncCtx.Queue().Get() + syncCtx.Queue().Done(item) + } + }) + } +} + +func TestReconcile(t *testing.T) { + cases := []struct { + name string + syncKey string + managedClusterAddon []runtime.Object + kubeObjects []runtime.Object + validateAddonActions func(t *testing.T, actions []clienttesting.Action) + validateKubeActions func(t *testing.T, actions []clienttesting.Action) + }{ + { + name: "no addon", + syncKey: "cluster1/test", + managedClusterAddon: []runtime.Object{}, + kubeObjects: []runtime.Object{}, + validateAddonActions: func(t *testing.T, actions []clienttesting.Action) { + testingcommon.AssertNoActions(t, actions) + }, + validateKubeActions: func(t *testing.T, actions []clienttesting.Action) { + // Should attempt cleanup even though addon doesn't exist + deleteCount := 0 + for _, action := range actions { + if action.GetVerb() == "delete" { + deleteCount++ + } + } + if deleteCount != 3 { + t.Errorf("expected 3 delete actions for cleanup, got %d", deleteCount) + } + }, + }, + { + name: "addon without token driver", + syncKey: "cluster1/test", + managedClusterAddon: []runtime.Object{ + newAddonWithCSRRegistration("test", "cluster1"), + }, + kubeObjects: []runtime.Object{}, + validateAddonActions: func(t *testing.T, actions []clienttesting.Action) { + testingcommon.AssertNoActions(t, actions) + }, + validateKubeActions: func(t *testing.T, actions []clienttesting.Action) { + testingcommon.AssertNoActions(t, actions) + }, + }, + { + name: "create token infrastructure for addon with token driver", + syncKey: "cluster1/test", + managedClusterAddon: []runtime.Object{ + newAddonWithTokenRegistration("test", "cluster1"), + }, + kubeObjects: []runtime.Object{}, + validateAddonActions: func(t *testing.T, actions []clienttesting.Action) { + testingcommon.AssertActions(t, actions, "patch") + patchAction := actions[0].(clienttesting.PatchActionImpl) + addon := &addonapiv1alpha1.ManagedClusterAddOn{} + err := json.Unmarshal(patchAction.Patch, addon) + if err != nil { + t.Fatal(err) + } + cond := meta.FindStatusCondition(addon.Status.Conditions, TokenInfrastructureReadyCondition) + if cond == nil { + t.Errorf("TokenInfrastructureReady condition not found") + return + } + if cond.Status != metav1.ConditionTrue { + t.Errorf("expected condition status True, got %s", cond.Status) + } + if cond.Reason != "TokenInfrastructureReady" { + t.Errorf("expected reason TokenInfrastructureReady, got %s", cond.Reason) + } + }, + validateKubeActions: func(t *testing.T, actions []clienttesting.Action) { + // Should create ServiceAccount, Role, and RoleBinding + // resourceapply.ApplyDirectly also does gets, so we check for creates + createCount := 0 + for _, action := range actions { + if action.GetVerb() == "create" { + createCount++ + } + } + if createCount != 3 { + t.Errorf("expected 3 create actions, got %d", createCount) + } + }, + }, + { + name: "cleanup when addon switches from token to CSR", + syncKey: "cluster1/test", + managedClusterAddon: []runtime.Object{ + func() *addonapiv1alpha1.ManagedClusterAddOn { + addon := newAddonWithCSRRegistration("test", "cluster1") + meta.SetStatusCondition(&addon.Status.Conditions, metav1.Condition{ + Type: TokenInfrastructureReadyCondition, + Status: metav1.ConditionTrue, + Reason: "TokenInfrastructureReady", + Message: "ServiceAccount cluster1/test-agent (UID: test-uid) is ready", + }) + return addon + }(), + }, + kubeObjects: []runtime.Object{ + &corev1.ServiceAccount{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-agent", + Namespace: "cluster1", + }, + }, + &rbacv1.Role{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-token-role", + Namespace: "cluster1", + }, + }, + &rbacv1.RoleBinding{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-token-role", + Namespace: "cluster1", + }, + }, + }, + validateAddonActions: func(t *testing.T, actions []clienttesting.Action) { + testingcommon.AssertActions(t, actions, "patch") + patchAction := actions[0].(clienttesting.PatchActionImpl) + addon := &addonapiv1alpha1.ManagedClusterAddOn{} + err := json.Unmarshal(patchAction.Patch, addon) + if err != nil { + t.Fatal(err) + } + cond := meta.FindStatusCondition(addon.Status.Conditions, TokenInfrastructureReadyCondition) + if cond != nil { + t.Errorf("TokenInfrastructureReady condition should be removed") + } + }, + validateKubeActions: func(t *testing.T, actions []clienttesting.Action) { + // Should delete RoleBinding, Role, and ServiceAccount + // resourceapply.DeleteAll also does gets, so we check for deletes + deleteCount := 0 + for _, action := range actions { + if action.GetVerb() == "delete" { + deleteCount++ + } + } + if deleteCount != 3 { + t.Errorf("expected 3 delete actions, got %d", deleteCount) + } + }, + }, + { + name: "cleanup when addon is being deleted", + syncKey: "cluster1/test", + managedClusterAddon: []runtime.Object{ + func() *addonapiv1alpha1.ManagedClusterAddOn { + addon := newAddonWithTokenInfraCondition("test", "cluster1", metav1.ConditionTrue) + now := metav1.Now() + addon.DeletionTimestamp = &now + return addon + }(), + }, + kubeObjects: []runtime.Object{ + &corev1.ServiceAccount{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-agent", + Namespace: "cluster1", + }, + }, + &rbacv1.Role{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-token-role", + Namespace: "cluster1", + }, + }, + &rbacv1.RoleBinding{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-token-role", + Namespace: "cluster1", + }, + }, + }, + validateAddonActions: func(t *testing.T, actions []clienttesting.Action) { + // No addon actions expected - the addon is being deleted, no need to update condition + testingcommon.AssertNoActions(t, actions) + }, + validateKubeActions: func(t *testing.T, actions []clienttesting.Action) { + // Should delete RoleBinding, Role, and ServiceAccount + deleteCount := 0 + for _, action := range actions { + if action.GetVerb() == "delete" { + deleteCount++ + } + } + if deleteCount != 3 { + t.Errorf("expected 3 delete actions, got %d", deleteCount) + } + }, + }, + { + name: "update condition when infrastructure already exists", + syncKey: "cluster1/test", + managedClusterAddon: []runtime.Object{ + newAddonWithTokenRegistration("test", "cluster1"), + }, + kubeObjects: []runtime.Object{ + &corev1.ServiceAccount{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-agent", + Namespace: "cluster1", + UID: types.UID("test-uid"), + }, + }, + &rbacv1.Role{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-token-role", + Namespace: "cluster1", + }, + }, + &rbacv1.RoleBinding{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-token-role", + Namespace: "cluster1", + }, + }, + }, + validateAddonActions: func(t *testing.T, actions []clienttesting.Action) { + testingcommon.AssertActions(t, actions, "patch") + patchAction := actions[0].(clienttesting.PatchActionImpl) + addon := &addonapiv1alpha1.ManagedClusterAddOn{} + err := json.Unmarshal(patchAction.Patch, addon) + if err != nil { + t.Fatal(err) + } + cond := meta.FindStatusCondition(addon.Status.Conditions, TokenInfrastructureReadyCondition) + if cond == nil { + t.Errorf("TokenInfrastructureReady condition not found") + return + } + if cond.Status != metav1.ConditionTrue { + t.Errorf("expected condition status True, got %s", cond.Status) + } + }, + validateKubeActions: func(t *testing.T, actions []clienttesting.Action) { + // Should update existing resources + if len(actions) == 0 { + t.Errorf("expected some actions") + } + }, + }, + { + name: "addon being deleted without TokenInfrastructureReady condition", + syncKey: "cluster1/test", + managedClusterAddon: []runtime.Object{ + func() *addonapiv1alpha1.ManagedClusterAddOn { + addon := newAddonWithTokenRegistration("test", "cluster1") + now := metav1.Now() + addon.DeletionTimestamp = &now + return addon + }(), + }, + kubeObjects: []runtime.Object{}, + validateAddonActions: func(t *testing.T, actions []clienttesting.Action) { + // No condition to remove, should be no-op + testingcommon.AssertNoActions(t, actions) + }, + validateKubeActions: func(t *testing.T, actions []clienttesting.Action) { + // No infrastructure to clean up + testingcommon.AssertNoActions(t, actions) + }, + }, + { + name: "addon with multiple registrations, only one token-based", + syncKey: "cluster1/test", + managedClusterAddon: []runtime.Object{ + func() *addonapiv1alpha1.ManagedClusterAddOn { + addon := addontesting.NewAddon("test", "cluster1") + addon.Status.Registrations = []addonapiv1alpha1.RegistrationConfig{ + { + SignerName: certificatesv1.KubeAPIServerClientSignerName, + }, + { + SignerName: "example.com/custom-signer", + }, + } + addon.Status.KubeClientDriver = "token" + return addon + }(), + }, + kubeObjects: []runtime.Object{}, + validateAddonActions: func(t *testing.T, actions []clienttesting.Action) { + // Should create infrastructure and set condition + testingcommon.AssertActions(t, actions, "patch") + patchAction := actions[0].(clienttesting.PatchActionImpl) + addon := &addonapiv1alpha1.ManagedClusterAddOn{} + err := json.Unmarshal(patchAction.Patch, addon) + if err != nil { + t.Fatal(err) + } + cond := meta.FindStatusCondition(addon.Status.Conditions, TokenInfrastructureReadyCondition) + if cond == nil { + t.Errorf("TokenInfrastructureReady condition not found") + return + } + if cond.Status != metav1.ConditionTrue { + t.Errorf("expected condition status True, got %s", cond.Status) + } + }, + validateKubeActions: func(t *testing.T, actions []clienttesting.Action) { + createCount := 0 + for _, action := range actions { + if action.GetVerb() == "create" { + createCount++ + } + } + if createCount != 3 { + t.Errorf("expected 3 create actions, got %d", createCount) + } + }, + }, + { + name: "invalid sync key", + syncKey: "invalid-key-without-slash", + managedClusterAddon: []runtime.Object{ + newAddonWithTokenRegistration("test", "cluster1"), + }, + kubeObjects: []runtime.Object{}, + validateAddonActions: func(t *testing.T, actions []clienttesting.Action) { + testingcommon.AssertNoActions(t, actions) + }, + validateKubeActions: func(t *testing.T, actions []clienttesting.Action) { + // Should attempt cleanup even with malformed key (SplitMetaNamespaceKey doesn't error on this) + deleteCount := 0 + for _, action := range actions { + if action.GetVerb() == "delete" { + deleteCount++ + } + } + if deleteCount != 3 { + t.Errorf("expected 3 delete actions for cleanup, got %d", deleteCount) + } + }, + }, + { + name: "condition transition from False to True after recovery", + syncKey: "cluster1/test", + managedClusterAddon: []runtime.Object{ + func() *addonapiv1alpha1.ManagedClusterAddOn { + addon := newAddonWithTokenRegistration("test", "cluster1") + meta.SetStatusCondition(&addon.Status.Conditions, metav1.Condition{ + Type: TokenInfrastructureReadyCondition, + Status: metav1.ConditionFalse, + Reason: "TokenInfrastructureApplyFailed", + Message: "Failed to apply token infrastructure", + }) + return addon + }(), + }, + kubeObjects: []runtime.Object{}, + validateAddonActions: func(t *testing.T, actions []clienttesting.Action) { + testingcommon.AssertActions(t, actions, "patch") + patchAction := actions[0].(clienttesting.PatchActionImpl) + addon := &addonapiv1alpha1.ManagedClusterAddOn{} + err := json.Unmarshal(patchAction.Patch, addon) + if err != nil { + t.Fatal(err) + } + cond := meta.FindStatusCondition(addon.Status.Conditions, TokenInfrastructureReadyCondition) + if cond == nil { + t.Errorf("TokenInfrastructureReady condition not found") + return + } + if cond.Status != metav1.ConditionTrue { + t.Errorf("expected condition status True after recovery, got %s", cond.Status) + } + if cond.Reason != "TokenInfrastructureReady" { + t.Errorf("expected reason TokenInfrastructureReady, got %s", cond.Reason) + } + }, + validateKubeActions: func(t *testing.T, actions []clienttesting.Action) { + createCount := 0 + for _, action := range actions { + if action.GetVerb() == "create" { + createCount++ + } + } + if createCount != 3 { + t.Errorf("expected 3 create actions, got %d", createCount) + } + }, + }, + } + + for _, c := range cases { + t.Run(c.name, func(t *testing.T) { + obj := append([]runtime.Object{}, c.managedClusterAddon...) + fakeAddonClient := fakeaddon.NewSimpleClientset(obj...) + fakeKubeClient := kubefake.NewSimpleClientset(c.kubeObjects...) + + addonInformers := addoninformers.NewSharedInformerFactory(fakeAddonClient, 10*time.Minute) + kubeInformers := informers.NewSharedInformerFactory(fakeKubeClient, 10*time.Minute) + + for _, obj := range c.managedClusterAddon { + if err := addonInformers.Addon().V1alpha1().ManagedClusterAddOns().Informer().GetStore().Add(obj); err != nil { + t.Fatal(err) + } + } + + syncContext := testingcommon.NewFakeSyncContext(t, c.syncKey) + + controller := NewTokenInfrastructureController( + fakeKubeClient, + fakeAddonClient, + addonInformers.Addon().V1alpha1().ManagedClusterAddOns(), + kubeInformers.Core().V1().ServiceAccounts(), + kubeInformers.Rbac().V1().Roles(), + kubeInformers.Rbac().V1().RoleBindings(), + ) + + err := controller.Sync(context.TODO(), syncContext, c.syncKey) + if err != nil { + t.Errorf("expected no error when sync: %v", err) + } + + c.validateAddonActions(t, fakeAddonClient.Actions()) + c.validateKubeActions(t, fakeKubeClient.Actions()) + }) + } +} diff --git a/pkg/addon/manager.go b/pkg/addon/manager.go index 851a531fa..1505ecddb 100644 --- a/pkg/addon/manager.go +++ b/pkg/addon/manager.go @@ -9,6 +9,7 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/client-go/dynamic" "k8s.io/client-go/dynamic/dynamicinformer" + "k8s.io/client-go/informers" "k8s.io/client-go/kubernetes" "k8s.io/client-go/tools/cache" "k8s.io/klog/v2" @@ -28,6 +29,7 @@ import ( "open-cluster-management.io/ocm/pkg/addon/controllers/addonowner" "open-cluster-management.io/ocm/pkg/addon/controllers/addonprogressing" "open-cluster-management.io/ocm/pkg/addon/controllers/addontemplate" + "open-cluster-management.io/ocm/pkg/addon/controllers/addontokeninfra" "open-cluster-management.io/ocm/pkg/addon/controllers/cmainstallprogression" addonindex "open-cluster-management.io/ocm/pkg/addon/index" ) @@ -85,6 +87,18 @@ func RunManager(ctx context.Context, controllerContext *controllercmd.Controller dynamicInformers := dynamicinformer.NewDynamicSharedInformerFactory(dynamicClient, 10*time.Minute) + // Create filtered informers for token infrastructure resources + tokenInfraInformers := informers.NewSharedInformerFactoryWithOptions(hubKubeClient, 10*time.Minute, + informers.WithTweakListOptions(func(listOptions *metav1.ListOptions) { + selector := &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "addon.open-cluster-management.io/token-infrastructure": "true", + }, + } + listOptions.LabelSelector = metav1.FormatLabelSelector(selector) + }), + ) + return RunControllerManagerWithInformers( ctx, controllerContext, hubKubeClient, @@ -94,6 +108,7 @@ func RunManager(ctx context.Context, controllerContext *controllercmd.Controller addonInformerFactory, workInformers, dynamicInformers, + tokenInfraInformers, ) } @@ -107,6 +122,7 @@ func RunControllerManagerWithInformers( addonInformers addoninformers.SharedInformerFactory, workinformers workv1informers.SharedInformerFactory, dynamicInformers dynamicinformer.DynamicSharedInformerFactory, + tokenInfraInformers informers.SharedInformerFactory, ) error { // addonDeployController err := workinformers.Work().V1().ManifestWorks().Informer().AddIndexers( @@ -194,6 +210,15 @@ func RunControllerManagerWithInformers( workinformers, ) + tokenInfrastructureController := addontokeninfra.NewTokenInfrastructureController( + hubKubeClient, + hubAddOnClient, + addonInformers.Addon().V1alpha1().ManagedClusterAddOns(), + tokenInfraInformers.Core().V1().ServiceAccounts(), + tokenInfraInformers.Rbac().V1().Roles(), + tokenInfraInformers.Rbac().V1().RoleBindings(), + ) + go addonManagementController.Run(ctx, 2) go addonConfigurationController.Run(ctx, 2) go addonOwnerController.Run(ctx, 2) @@ -202,11 +227,13 @@ func RunControllerManagerWithInformers( // There should be only one instance of addonTemplateController running, since the addonTemplateController will // start a goroutine for each template-type addon it watches. go addonTemplateController.Run(ctx, 1) + go tokenInfrastructureController.Run(ctx, 1) clusterInformers.Start(ctx.Done()) addonInformers.Start(ctx.Done()) workinformers.Start(ctx.Done()) dynamicInformers.Start(ctx.Done()) + tokenInfraInformers.Start(ctx.Done()) <-ctx.Done() return nil diff --git a/test/integration/addon/token_infrastructure_test.go b/test/integration/addon/token_infrastructure_test.go new file mode 100644 index 000000000..56752a54b --- /dev/null +++ b/test/integration/addon/token_infrastructure_test.go @@ -0,0 +1,320 @@ +package integration + +import ( + "context" + "fmt" + + ginkgo "github.com/onsi/ginkgo/v2" + "github.com/onsi/gomega" + certificates "k8s.io/api/certificates/v1" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/api/meta" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/rand" + + addonapiv1alpha1 "open-cluster-management.io/api/addon/v1alpha1" + clusterv1 "open-cluster-management.io/api/cluster/v1" +) + +var _ = ginkgo.Describe("Token Infrastructure Controller", func() { + var managedClusterName, addOnName string + var err error + + ginkgo.BeforeEach(func() { + suffix := rand.String(5) + managedClusterName = fmt.Sprintf("cluster-%s", suffix) + addOnName = fmt.Sprintf("addon-%s", suffix) + + // Create managed cluster + managedCluster := &clusterv1.ManagedCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: managedClusterName, + }, + Spec: clusterv1.ManagedClusterSpec{ + HubAcceptsClient: true, + }, + } + _, err = hubClusterClient.ClusterV1().ManagedClusters().Create(context.Background(), managedCluster, metav1.CreateOptions{}) + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + + // Create cluster namespace + ns := &corev1.Namespace{ + ObjectMeta: metav1.ObjectMeta{ + Name: managedClusterName, + }, + } + _, err = hubKubeClient.CoreV1().Namespaces().Create(context.Background(), ns, metav1.CreateOptions{}) + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + }) + + ginkgo.AfterEach(func() { + err = hubClusterClient.ClusterV1().ManagedClusters().Delete(context.Background(), managedClusterName, metav1.DeleteOptions{}) + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + + err = hubKubeClient.CoreV1().Namespaces().Delete(context.Background(), managedClusterName, metav1.DeleteOptions{}) + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + }) + + assertTokenInfrastructureReady := func(clusterName, addonName string) { + ginkgo.By("Verify token infrastructure resources are created") + serviceAccountName := fmt.Sprintf("%s-agent", addonName) + roleName := fmt.Sprintf("%s-token-role", addonName) + roleBindingName := fmt.Sprintf("%s-token-role", addonName) + + // Check ServiceAccount exists with correct labels + gomega.Eventually(func() error { + sa, err := hubKubeClient.CoreV1().ServiceAccounts(clusterName).Get(context.Background(), serviceAccountName, metav1.GetOptions{}) + if err != nil { + return err + } + if sa.Labels["addon.open-cluster-management.io/token-infrastructure"] != "true" { + return fmt.Errorf("ServiceAccount missing token-infrastructure label") + } + if sa.Labels["addon.open-cluster-management.io/name"] != addonName { + return fmt.Errorf("ServiceAccount missing addon name label") + } + return nil + }, eventuallyTimeout, eventuallyInterval).Should(gomega.Succeed()) + + // Check Role exists with correct labels + gomega.Eventually(func() error { + role, err := hubKubeClient.RbacV1().Roles(clusterName).Get(context.Background(), roleName, metav1.GetOptions{}) + if err != nil { + return err + } + if role.Labels["addon.open-cluster-management.io/token-infrastructure"] != "true" { + return fmt.Errorf("Role missing token-infrastructure label") + } + if role.Labels["addon.open-cluster-management.io/name"] != addonName { + return fmt.Errorf("Role missing addon name label") + } + return nil + }, eventuallyTimeout, eventuallyInterval).Should(gomega.Succeed()) + + // Check RoleBinding exists with correct labels + gomega.Eventually(func() error { + rb, err := hubKubeClient.RbacV1().RoleBindings(clusterName).Get(context.Background(), roleBindingName, metav1.GetOptions{}) + if err != nil { + return err + } + if rb.Labels["addon.open-cluster-management.io/token-infrastructure"] != "true" { + return fmt.Errorf("RoleBinding missing token-infrastructure label") + } + if rb.Labels["addon.open-cluster-management.io/name"] != addonName { + return fmt.Errorf("RoleBinding missing addon name label") + } + return nil + }, eventuallyTimeout, eventuallyInterval).Should(gomega.Succeed()) + + ginkgo.By("Verify TokenInfrastructureReady condition is set to True") + gomega.Eventually(func() error { + addon, err := hubAddonClient.AddonV1alpha1().ManagedClusterAddOns(clusterName).Get(context.Background(), addonName, metav1.GetOptions{}) + if err != nil { + return err + } + + cond := meta.FindStatusCondition(addon.Status.Conditions, "TokenInfrastructureReady") + if cond == nil { + return fmt.Errorf("TokenInfrastructureReady condition not found") + } + if cond.Status != metav1.ConditionTrue { + return fmt.Errorf("TokenInfrastructureReady condition is not True: %s - %s", cond.Reason, cond.Message) + } + return nil + }, eventuallyTimeout, eventuallyInterval).Should(gomega.Succeed()) + } + + assertTokenInfrastructureCleanedUp := func(clusterName, addonName string) { + ginkgo.By("Verify token infrastructure resources are deleted") + serviceAccountName := fmt.Sprintf("%s-agent", addonName) + roleName := fmt.Sprintf("%s-token-role", addonName) + roleBindingName := fmt.Sprintf("%s-token-role", addonName) + + // Check ServiceAccount is deleted + gomega.Eventually(func() bool { + _, err := hubKubeClient.CoreV1().ServiceAccounts(clusterName).Get(context.Background(), serviceAccountName, metav1.GetOptions{}) + return errors.IsNotFound(err) + }, eventuallyTimeout, eventuallyInterval).Should(gomega.BeTrue()) + + // Check Role is deleted + gomega.Eventually(func() bool { + _, err := hubKubeClient.RbacV1().Roles(clusterName).Get(context.Background(), roleName, metav1.GetOptions{}) + return errors.IsNotFound(err) + }, eventuallyTimeout, eventuallyInterval).Should(gomega.BeTrue()) + + // Check RoleBinding is deleted + gomega.Eventually(func() bool { + _, err := hubKubeClient.RbacV1().RoleBindings(clusterName).Get(context.Background(), roleBindingName, metav1.GetOptions{}) + return errors.IsNotFound(err) + }, eventuallyTimeout, eventuallyInterval).Should(gomega.BeTrue()) + } + + ginkgo.It("should create token infrastructure when addon uses token driver", func() { + ginkgo.By("Create ManagedClusterAddOn") + addOn := &addonapiv1alpha1.ManagedClusterAddOn{ + ObjectMeta: metav1.ObjectMeta{ + Name: addOnName, + Namespace: managedClusterName, + }, + Spec: addonapiv1alpha1.ManagedClusterAddOnSpec{ + InstallNamespace: addOnName, + }, + } + _, err = hubAddonClient.AddonV1alpha1().ManagedClusterAddOns(managedClusterName).Create(context.Background(), addOn, metav1.CreateOptions{}) + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + + ginkgo.By("Update addon status with kubeClient registration and token driver") + gomega.Eventually(func() error { + addon, err := hubAddonClient.AddonV1alpha1().ManagedClusterAddOns(managedClusterName).Get(context.Background(), addOnName, metav1.GetOptions{}) + if err != nil { + return err + } + + addon.Status.Registrations = []addonapiv1alpha1.RegistrationConfig{ + { + SignerName: certificates.KubeAPIServerClientSignerName, + }, + } + addon.Status.KubeClientDriver = "token" + _, err = hubAddonClient.AddonV1alpha1().ManagedClusterAddOns(managedClusterName).UpdateStatus(context.Background(), addon, metav1.UpdateOptions{}) + return err + }, eventuallyTimeout, eventuallyInterval).Should(gomega.Succeed()) + + assertTokenInfrastructureReady(managedClusterName, addOnName) + }) + + ginkgo.It("should cleanup token infrastructure when addon switches from token to CSR driver", func() { + ginkgo.By("Create ManagedClusterAddOn with token driver") + addOn := &addonapiv1alpha1.ManagedClusterAddOn{ + ObjectMeta: metav1.ObjectMeta{ + Name: addOnName, + Namespace: managedClusterName, + }, + Spec: addonapiv1alpha1.ManagedClusterAddOnSpec{ + InstallNamespace: addOnName, + }, + } + _, err = hubAddonClient.AddonV1alpha1().ManagedClusterAddOns(managedClusterName).Create(context.Background(), addOn, metav1.CreateOptions{}) + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + + gomega.Eventually(func() error { + addon, err := hubAddonClient.AddonV1alpha1().ManagedClusterAddOns(managedClusterName).Get(context.Background(), addOnName, metav1.GetOptions{}) + if err != nil { + return err + } + + addon.Status.Registrations = []addonapiv1alpha1.RegistrationConfig{ + { + SignerName: certificates.KubeAPIServerClientSignerName, + }, + } + addon.Status.KubeClientDriver = "token" + _, err = hubAddonClient.AddonV1alpha1().ManagedClusterAddOns(managedClusterName).UpdateStatus(context.Background(), addon, metav1.UpdateOptions{}) + return err + }, eventuallyTimeout, eventuallyInterval).Should(gomega.Succeed()) + + assertTokenInfrastructureReady(managedClusterName, addOnName) + + ginkgo.By("Switch addon to CSR driver") + gomega.Eventually(func() error { + addon, err := hubAddonClient.AddonV1alpha1().ManagedClusterAddOns(managedClusterName).Get(context.Background(), addOnName, metav1.GetOptions{}) + if err != nil { + return err + } + + // Update kubeClientDriver to switch to CSR driver + addon.Status.KubeClientDriver = "csr" + _, err = hubAddonClient.AddonV1alpha1().ManagedClusterAddOns(managedClusterName).UpdateStatus(context.Background(), addon, metav1.UpdateOptions{}) + return err + }, eventuallyTimeout, eventuallyInterval).Should(gomega.Succeed()) + + assertTokenInfrastructureCleanedUp(managedClusterName, addOnName) + + ginkgo.By("Verify TokenInfrastructureReady condition is removed") + gomega.Eventually(func() bool { + addon, err := hubAddonClient.AddonV1alpha1().ManagedClusterAddOns(managedClusterName).Get(context.Background(), addOnName, metav1.GetOptions{}) + if err != nil { + return false + } + + cond := meta.FindStatusCondition(addon.Status.Conditions, "TokenInfrastructureReady") + return cond == nil + }, eventuallyTimeout, eventuallyInterval).Should(gomega.BeTrue()) + }) + + ginkgo.It("should cleanup token infrastructure when addon is deleted", func() { + ginkgo.By("Create ManagedClusterAddOn with token driver") + addOn := &addonapiv1alpha1.ManagedClusterAddOn{ + ObjectMeta: metav1.ObjectMeta{ + Name: addOnName, + Namespace: managedClusterName, + }, + Spec: addonapiv1alpha1.ManagedClusterAddOnSpec{ + InstallNamespace: addOnName, + }, + } + _, err = hubAddonClient.AddonV1alpha1().ManagedClusterAddOns(managedClusterName).Create(context.Background(), addOn, metav1.CreateOptions{}) + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + + gomega.Eventually(func() error { + addon, err := hubAddonClient.AddonV1alpha1().ManagedClusterAddOns(managedClusterName).Get(context.Background(), addOnName, metav1.GetOptions{}) + if err != nil { + return err + } + + addon.Status.Registrations = []addonapiv1alpha1.RegistrationConfig{ + { + SignerName: certificates.KubeAPIServerClientSignerName, + }, + } + addon.Status.KubeClientDriver = "token" + _, err = hubAddonClient.AddonV1alpha1().ManagedClusterAddOns(managedClusterName).UpdateStatus(context.Background(), addon, metav1.UpdateOptions{}) + return err + }, eventuallyTimeout, eventuallyInterval).Should(gomega.Succeed()) + + assertTokenInfrastructureReady(managedClusterName, addOnName) + + ginkgo.By("Delete the addon") + err = hubAddonClient.AddonV1alpha1().ManagedClusterAddOns(managedClusterName).Delete(context.Background(), addOnName, metav1.DeleteOptions{}) + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + + assertTokenInfrastructureCleanedUp(managedClusterName, addOnName) + }) + + ginkgo.It("should handle addon with multiple registrations where only one is token-based", func() { + ginkgo.By("Create ManagedClusterAddOn with multiple registrations including token driver") + addOn := &addonapiv1alpha1.ManagedClusterAddOn{ + ObjectMeta: metav1.ObjectMeta{ + Name: addOnName, + Namespace: managedClusterName, + }, + Spec: addonapiv1alpha1.ManagedClusterAddOnSpec{ + InstallNamespace: addOnName, + }, + } + _, err = hubAddonClient.AddonV1alpha1().ManagedClusterAddOns(managedClusterName).Create(context.Background(), addOn, metav1.CreateOptions{}) + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + + gomega.Eventually(func() error { + addon, err := hubAddonClient.AddonV1alpha1().ManagedClusterAddOns(managedClusterName).Get(context.Background(), addOnName, metav1.GetOptions{}) + if err != nil { + return err + } + + addon.Status.Registrations = []addonapiv1alpha1.RegistrationConfig{ + { + SignerName: certificates.KubeAPIServerClientSignerName, + }, + { + SignerName: "example.com/custom-signer", + }, + } + addon.Status.KubeClientDriver = "token" + _, err = hubAddonClient.AddonV1alpha1().ManagedClusterAddOns(managedClusterName).UpdateStatus(context.Background(), addon, metav1.UpdateOptions{}) + return err + }, eventuallyTimeout, eventuallyInterval).Should(gomega.Succeed()) + + assertTokenInfrastructureReady(managedClusterName, addOnName) + }) +})