diff --git a/pkg/operator/operators/clustermanager/controllers/clustermanagercontroller/clustermanager_controller.go b/pkg/operator/operators/clustermanager/controllers/clustermanagercontroller/clustermanager_controller.go index e18f521a5..f154b5c90 100644 --- a/pkg/operator/operators/clustermanager/controllers/clustermanagercontroller/clustermanager_controller.go +++ b/pkg/operator/operators/clustermanager/controllers/clustermanagercontroller/clustermanager_controller.go @@ -4,12 +4,14 @@ import ( "context" "encoding/base64" errorhelpers "errors" + "fmt" "os" "strings" "time" "github.com/openshift/library-go/pkg/assets" "github.com/openshift/library-go/pkg/operator/resource/resourceapply" + corev1 "k8s.io/api/core/v1" apiextensionsclient "k8s.io/apiextensions-apiserver/pkg/client/clientset/clientset" "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/api/meta" @@ -281,18 +283,32 @@ func (n *clusterManagerController) sync(ctx context.Context, controllerContext f return n.patcher.RemoveFinalizer(ctx, clusterManager, clusterManagerFinalizer) } - // get caBundle - caBundle := "placeholder" - configmap, err := n.configMapLister.ConfigMaps(clusterManagerNamespace).Get(helpers.CaBundleConfigmap) - switch { - case errors.IsNotFound(err): - // do nothing - case err != nil: + // Create the hub namespace FIRST, before waiting for the CA bundle. + // This is required because the cert rotation controller needs the namespace to exist + // before it can create the ca-bundle-configmap. If we wait for the CA bundle before + // creating the namespace, we would have a deadlock. + if err := ensureNamespace(ctx, hubClient, clusterManagerNamespace); err != nil { return err - default: - if cb := configmap.Data["ca-bundle.crt"]; len(cb) > 0 { - caBundle = cb + } + + // get caBundle from the ConfigMap created by the cert rotation controller. + // The cert rotation controller must create this ConfigMap before we can proceed, + // otherwise the CRDs will be created with an invalid "placeholder" CA bundle, + // causing webhook conversion to fail and the CRDs to not become Established. + configmap, err := n.configMapLister.ConfigMaps(clusterManagerNamespace).Get(helpers.CaBundleConfigmap) + if err != nil { + if errors.IsNotFound(err) { + logger.V(4).Info("CA bundle configmap not yet available, will retry", + "namespace", clusterManagerNamespace, "configmap", helpers.CaBundleConfigmap) + controllerContext.Queue().AddAfter(clusterManagerName, 3*time.Second) + return nil } + return err + } + caBundle := configmap.Data["ca-bundle.crt"] + if len(caBundle) == 0 { + return fmt.Errorf("CA bundle configmap %s/%s exists but ca-bundle.crt is empty", + clusterManagerNamespace, helpers.CaBundleConfigmap) } encodedCaBundle := base64.StdEncoding.EncodeToString([]byte(caBundle)) config.RegistrationAPIServiceCABundle = encodedCaBundle @@ -385,6 +401,33 @@ func ensureSAKubeconfigs(ctx context.Context, clusterManagerName, clusterManager return nil } +// ensureNamespace creates the namespace if it doesn't exist. +// This is used to create the hub namespace before waiting for the CA bundle, +// so that the cert rotation controller can create the ca-bundle-configmap. +func ensureNamespace(ctx context.Context, kubeClient kubernetes.Interface, namespace string) error { + _, err := kubeClient.CoreV1().Namespaces().Get(ctx, namespace, metav1.GetOptions{}) + if err == nil { + // Namespace already exists + return nil + } + if !errors.IsNotFound(err) { + return err + } + // Create the namespace + ns := &corev1.Namespace{ + ObjectMeta: metav1.ObjectMeta{ + Name: namespace, + }, + } + _, err = kubeClient.CoreV1().Namespaces().Create(ctx, ns, metav1.CreateOptions{}) + if err != nil && !errors.IsAlreadyExists(err) { + return err + } + logger := klog.FromContext(ctx) + logger.V(4).Info("Created namespace for cert rotation controller", "namespace", namespace) + return nil +} + // TODO: support IPV6 address func isIPFormat(address string) bool { runes := []rune(address) diff --git a/pkg/operator/operators/clustermanager/controllers/clustermanagercontroller/clustermanager_controller_test.go b/pkg/operator/operators/clustermanager/controllers/clustermanagercontroller/clustermanager_controller_test.go index 7fd1ca935..f3e6b38e5 100644 --- a/pkg/operator/operators/clustermanager/controllers/clustermanagercontroller/clustermanager_controller_test.go +++ b/pkg/operator/operators/clustermanager/controllers/clustermanagercontroller/clustermanager_controller_test.go @@ -81,7 +81,23 @@ func newClusterManager(name string) *operatorapiv1.ClusterManager { } } -func newTestController(t *testing.T, clustermanager *operatorapiv1.ClusterManager) *testController { +// newCaBundleConfigMap creates the CA bundle ConfigMap that the cert rotation controller creates. +// This is required for the cluster manager controller to proceed with deploying CRDs. +func newCaBundleConfigMap(namespace string) *corev1.ConfigMap { + return &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: helpers.CaBundleConfigmap, + Namespace: namespace, + }, + Data: map[string]string{ + "ca-bundle.crt": "-----BEGIN CERTIFICATE-----\ntest-ca-bundle\n-----END CERTIFICATE-----", + }, + } +} + +// newTestControllerWithoutCaBundle creates a test controller without the CA bundle ConfigMap +// to test the behavior when the cert rotation controller hasn't created it yet. +func newTestControllerWithoutCaBundle(t *testing.T, clustermanager *operatorapiv1.ClusterManager) *testController { kubeClient := fakekube.NewSimpleClientset() kubeInfomers := kubeinformers.NewSharedInformerFactory(kubeClient, 5*time.Minute) fakeOperatorClient := fakeoperatorlient.NewSimpleClientset(clustermanager) @@ -107,6 +123,44 @@ func newTestController(t *testing.T, clustermanager *operatorapiv1.ClusterManage } } +// newTestController creates a test controller with all required dependencies including +// the CA bundle ConfigMap. This simulates the normal state where the cert rotation +// controller has already created the CA bundle ConfigMap. +func newTestController(t *testing.T, clustermanager *operatorapiv1.ClusterManager) *testController { + clusterManagerNamespace := helpers.ClusterManagerNamespace(clustermanager.Name, clustermanager.Spec.DeployOption.Mode) + caBundleConfigMap := newCaBundleConfigMap(clusterManagerNamespace) + + kubeClient := fakekube.NewSimpleClientset(caBundleConfigMap) + kubeInfomers := kubeinformers.NewSharedInformerFactory(kubeClient, 5*time.Minute) + fakeOperatorClient := fakeoperatorlient.NewSimpleClientset(clustermanager) + operatorInformers := operatorinformers.NewSharedInformerFactory(fakeOperatorClient, 5*time.Minute) + + clusterManagerController := &clusterManagerController{ + patcher: patcher.NewPatcher[ + *operatorapiv1.ClusterManager, operatorapiv1.ClusterManagerSpec, operatorapiv1.ClusterManagerStatus]( + fakeOperatorClient.OperatorV1().ClusterManagers()), + clusterManagerLister: operatorInformers.Operator().V1().ClusterManagers().Lister(), + configMapLister: kubeInfomers.Core().V1().ConfigMaps().Lister(), + cache: resourceapply.NewResourceCache(), + } + + store := operatorInformers.Operator().V1().ClusterManagers().Informer().GetStore() + if err := store.Add(clustermanager); err != nil { + t.Fatal(err) + } + + // Add the CA bundle ConfigMap to the informer store + configMapStore := kubeInfomers.Core().V1().ConfigMaps().Informer().GetStore() + if err := configMapStore.Add(caBundleConfigMap); err != nil { + t.Fatal(err) + } + + return &testController{ + clusterManagerController: clusterManagerController, + operatorClient: fakeOperatorClient, + } +} + func setDeployment(clusterManagerName, clusterManagerNamespace string) []runtime.Object { var replicas = int32(1) @@ -629,6 +683,30 @@ func TestSyncDeployNoWebhook(t *testing.T) { testingcommon.AssertEqualNumber(t, len(createCRDObjects), 12) } +// TestSyncDeployWithoutCaBundle tests that sync requeues when the CA bundle ConfigMap +// doesn't exist. This ensures the controller waits for the cert rotation controller to create +// the ConfigMap before proceeding, preventing CRDs from being created with invalid "placeholder" +// CA bundles that would cause webhook conversion to fail. +func TestSyncDeployWithoutCaBundle(t *testing.T) { + clusterManager := newClusterManager("testhub") + tc := newTestControllerWithoutCaBundle(t, clusterManager) + clusterManagerNamespace := helpers.ClusterManagerNamespace(clusterManager.Name, clusterManager.Spec.DeployOption.Mode) + cd := setDeployment(clusterManager.Name, clusterManagerNamespace) + setup(t, tc, cd) + + syncContext := testingcommon.NewFakeSyncContext(t, "testhub") + + err := tc.clusterManagerController.sync(ctx, syncContext, "testhub") + // sync should return nil (requeue via AddAfter) instead of an error + if err != nil { + t.Fatalf("Expected no error when CA bundle ConfigMap is missing (should requeue via AddAfter), but got: %v", err) + } + + // Note: We can't easily verify the requeue here because AddAfter adds items with a delay. + // The important thing is that sync returns nil (indicating graceful requeue) instead of + // proceeding with an invalid CA bundle or returning an error. +} + // TestSyncDelete test cleanup hub deploy func TestSyncDelete(t *testing.T) { clusterManager := newClusterManager("testhub")