From 8db2ecf7e18ca4f89e9626bbff992046b07ba356 Mon Sep 17 00:00:00 2001 From: liuwei Date: Mon, 31 May 2021 09:03:48 +0000 Subject: [PATCH] support to check hub seceret expired Signed-off-by: liuwei --- .../bootstrapcontroller.go | 124 ++++++++++++++---- .../bootstrapcontroller_test.go | 84 +++++++++++- test/integration/integration_suite_test.go | 5 + test/integration/klusterlet_test.go | 69 ++++++++++ test/integration/util/util.go | 54 ++++++++ 5 files changed, 305 insertions(+), 31 deletions(-) diff --git a/pkg/operators/klusterlet/controllers/bootstrapcontroller/bootstrapcontroller.go b/pkg/operators/klusterlet/controllers/bootstrapcontroller/bootstrapcontroller.go index 60e8b884c..a8c270401 100644 --- a/pkg/operators/klusterlet/controllers/bootstrapcontroller/bootstrapcontroller.go +++ b/pkg/operators/klusterlet/controllers/bootstrapcontroller/bootstrapcontroller.go @@ -4,7 +4,7 @@ import ( "bytes" "context" "fmt" - "strings" + "time" operatorinformer "github.com/open-cluster-management/api/client/operator/informers/externalversions/operator/v1" operatorlister "github.com/open-cluster-management/api/client/operator/listers/operator/v1" @@ -22,14 +22,22 @@ import ( coreinformer "k8s.io/client-go/informers/core/v1" "k8s.io/client-go/kubernetes" corelister "k8s.io/client-go/listers/core/v1" + "k8s.io/client-go/tools/cache" "k8s.io/client-go/tools/clientcmd" clientcmdapi "k8s.io/client-go/tools/clientcmd/api" + certutil "k8s.io/client-go/util/cert" "k8s.io/klog/v2" ) -// bootstrapController watches bootstrap hub kubeconfig secret, if the secret is changed with hub kube-apiserver ca or apiserver -// endpoints, this controller will make the klusterlet re-bootstrap to get the new hub kubeconfig from hub cluster by deleting -// the current hub kubeconfig secret and restart the klusterlet agents +const tlsCertFile = "tls.crt" + +// BootstrapControllerSyncInterval is exposed so that integration tests can crank up the constroller sync speed. +var BootstrapControllerSyncInterval = 5 * time.Minute + +// bootstrapController watches bootstrap-hub-kubeconfig and hub-kubeconfig-secret secrets, if the bootstrap-hub-kubeconfig secret +// is changed with hub kube-apiserver ca or apiserver endpoints, or the hub-kubeconfig-secret secret is expired, this controller +// will make the klusterlet re-bootstrap to get the new hub kubeconfig from hub cluster by deleting the current hub kubeconfig +// secret and restart the klusterlet agents type bootstrapController struct { kubeClient kubernetes.Interface klusterletLister operatorlister.KlusterletLister @@ -49,6 +57,7 @@ func NewBootstrapController( } return factory.New().WithSync(controller.sync). WithInformersQueueKeyFunc(bootstrapSecretQueueKeyFunc(controller.klusterletLister), secretInformer.Informer()). + ResyncEvery(BootstrapControllerSyncInterval). ToController("BootstrapController", recorder) } @@ -58,15 +67,33 @@ func (k *bootstrapController) sync(ctx context.Context, controllerContext factor return nil } - keys := strings.Split(queueKey, "/") - if len(keys) != 2 { - // this should not happen, do nothing + klog.V(4).Infof("Reconciling klusterlet kubeconfig secrets %q", queueKey) + + klusterletNamespace, klusterletName, err := cache.SplitMetaNamespaceKey(queueKey) + if err != nil { + // ignore bad format key return nil } - klusterletNamespace := keys[0] - klusterletName := keys[1] - klog.V(4).Infof("Reconciling bootstrap hub kubeconfig secret %q", klusterletNamespace+"/"+helpers.BootstrapHubKubeConfig) + // triggered by resync, checking whether the hub kubeconfig secret is expired + if klusterletNamespace == "" && klusterletName == factory.DefaultQueueKey { + klusterlets, err := k.klusterletLister.List(labels.Everything()) + if err != nil { + return err + } + + for _, klusterlet := range klusterlets { + namespace := klusterlet.Spec.Namespace + if namespace == "" { + namespace = helpers.KlusterletDefaultNamespace + } + + // enqueue the klusterlet to reconcile + controllerContext.Queue().Add(fmt.Sprintf("%s/%s", namespace, klusterlet.Name)) + } + + return nil + } bootstrapHubKubeconfigSecret, err := k.secretLister.Secrets(klusterletNamespace).Get(helpers.BootstrapHubKubeConfig) switch { @@ -81,14 +108,18 @@ func (k *bootstrapController) sync(ctx context.Context, controllerContext factor if err != nil { // a bad bootstrap secret, ignore it controllerContext.Recorder().Warningf("BadBootstrapSecret", - fmt.Sprintf("unable to load hub kubeconfig from secret %q: %v", klusterletNamespace+"/"+helpers.BootstrapHubKubeConfig, err)) + fmt.Sprintf("unable to load hub kubeconfig from secret %s/%s: %v", klusterletNamespace, helpers.BootstrapHubKubeConfig, err)) return nil } hubKubeconfigSecret, err := k.secretLister.Secrets(klusterletNamespace).Get(helpers.HubKubeConfig) switch { case errors.IsNotFound(err): - // the hub kubeconfig secret not found, could not have bootstrap yet, do nothing + // the hub kubeconfig secret not found, could not have bootstrap yet, do nothing currently + // TODO one case should be supported in the future: the bootstrap phase may be failed due to + // the content of bootstrap secret is wrong, this also results in the hub kubeconfig secret + // cannot be found. In this case, user may need to correct the bootstrap secret. we need to + // find a way to know the bootstrap secret is corrected, and then reload the klusterlet return nil case err != nil: return err @@ -98,46 +129,58 @@ func (k *bootstrapController) sync(ctx context.Context, controllerContext factor if err != nil { // the hub kubeconfig secret has errors, do nothing controllerContext.Recorder().Warningf("BadHubKubeConfigSecret", - fmt.Sprintf("unable to load hub kubeconfig from secret %q: %v", klusterletNamespace+"/"+helpers.BootstrapHubKubeConfig, err)) + fmt.Sprintf("unable to load hub kubeconfig from secret %s/%s: %v", klusterletNamespace, helpers.BootstrapHubKubeConfig, err)) return nil } - // the CA and server are not changed in bootstrap kubeconfig secret, ignore this change - if bootstrapKubeconfig.Server == hubKubeconfig.Server && - bytes.Equal(bootstrapKubeconfig.CertificateAuthorityData, hubKubeconfig.CertificateAuthorityData) { + if bootstrapKubeconfig.Server != hubKubeconfig.Server || + !bytes.Equal(bootstrapKubeconfig.CertificateAuthorityData, hubKubeconfig.CertificateAuthorityData) { + // the bootstrap kubeconfig secret is changed, reload the klusterlet agents + reloadReason := fmt.Sprintf("the bootstrap secret %s/%s is changed", klusterletNamespace, helpers.BootstrapHubKubeConfig) + return k.reloadAgents(ctx, controllerContext, klusterletNamespace, klusterletName, reloadReason) + } + + expired, err := isHubKubeconfigSecretExpired(hubKubeconfigSecret) + if err != nil { + // the hub kubeconfig secret has errors, do nothing + controllerContext.Recorder().Warningf("BadHubKubeConfigSecret", + fmt.Sprintf("the hub kubeconfig secret %s/%s is invalid: %v", klusterletNamespace, helpers.HubKubeConfig, err)) return nil } - // the bootstrap kubeconfig secret is changed, reload the klusterlet agents - return k.reloadAgents(ctx, controllerContext, klusterletNamespace, klusterletName) + // the hub kubeconfig secret cert is not expired, do nothing + if !expired { + return nil + } + + // the hub kubeconfig secret cert is expired, reload klusterlet to restart bootstrap + reloadReason := fmt.Sprintf("the hub kubeconfig secret %s/%s is expired", klusterletNamespace, helpers.HubKubeConfig) + return k.reloadAgents(ctx, controllerContext, klusterletNamespace, klusterletName, reloadReason) } // reloadAgents reload klusterlet agents by // 1. make the registration agent re-bootstrap by deleting the current hub kubeconfig secret to // 2. restart the registration and work agents to reload the new hub ca by deleting the agent deployments -func (k *bootstrapController) reloadAgents(ctx context.Context, ctrlContext factory.SyncContext, namespace, klusterletName string) error { +func (k *bootstrapController) reloadAgents(ctx context.Context, ctrlContext factory.SyncContext, namespace, klusterletName, reason string) error { if err := k.kubeClient.CoreV1().Secrets(namespace).Delete(ctx, helpers.HubKubeConfig, metav1.DeleteOptions{}); err != nil { return err } - ctrlContext.Recorder().Eventf("HubKubeconfigSecretDeleted", - fmt.Sprintf("the hub kubeconfig secret %q is deleted due to the bootstrap secret %q is changed", - namespace+"/"+helpers.HubKubeConfig, namespace+"/"+helpers.BootstrapHubKubeConfig)) + ctrlContext.Recorder().Eventf("HubKubeconfigSecretDeleted", fmt.Sprintf("the hub kubeconfig secret %s/%s is deleted due to %s", + namespace, helpers.HubKubeConfig, reason)) registrationName := fmt.Sprintf("%s-registration-agent", klusterletName) if err := k.kubeClient.AppsV1().Deployments(namespace).Delete(ctx, registrationName, metav1.DeleteOptions{}); err != nil { return err } - ctrlContext.Recorder().Eventf("KlusterletAgentDeploymentDeleted", - fmt.Sprintf("the deployment %q is deleted due to the bootstrap secret %q is changed", - namespace+"/"+registrationName, namespace+"/"+helpers.BootstrapHubKubeConfig)) + ctrlContext.Recorder().Eventf("KlusterletAgentDeploymentDeleted", fmt.Sprintf("the deployment %s/%s is deleted due to %s", + namespace, registrationName, reason)) workName := fmt.Sprintf("%s-work-agent", klusterletName) if err := k.kubeClient.AppsV1().Deployments(namespace).Delete(ctx, workName, metav1.DeleteOptions{}); err != nil { return err } - ctrlContext.Recorder().Eventf("KlusterletAgentDeploymentDeleted", - fmt.Sprintf("the deployment %q is deleted due to the bootstrap secret %q is changed", - namespace+"/"+workName, namespace+"/"+helpers.BootstrapHubKubeConfig)) + ctrlContext.Recorder().Eventf("KlusterletAgentDeploymentDeleted", fmt.Sprintf("the deployment %s/%s is deleted due to %s", + namespace, workName, reason)) return nil } @@ -186,3 +229,28 @@ func bootstrapSecretQueueKeyFunc(klusterletLister operatorlister.KlusterletListe return "" } } + +func isHubKubeconfigSecretExpired(secret *corev1.Secret) (bool, error) { + certData, ok := secret.Data[tlsCertFile] + if !ok { + return false, fmt.Errorf("there is no %q", tlsCertFile) + } + + certs, err := certutil.ParseCertsPEM(certData) + if err != nil { + return false, fmt.Errorf("failed to parse cert: %v", err) + } + + if len(certs) == 0 { + return false, fmt.Errorf("there are no certs in %q", tlsCertFile) + } + + now := time.Now() + for _, cert := range certs { + if now.After(cert.NotAfter) { + return true, nil + } + } + + return false, nil +} diff --git a/pkg/operators/klusterlet/controllers/bootstrapcontroller/bootstrapcontroller_test.go b/pkg/operators/klusterlet/controllers/bootstrapcontroller/bootstrapcontroller_test.go index 42c7b54ac..05663b3bd 100644 --- a/pkg/operators/klusterlet/controllers/bootstrapcontroller/bootstrapcontroller_test.go +++ b/pkg/operators/klusterlet/controllers/bootstrapcontroller/bootstrapcontroller_test.go @@ -2,6 +2,12 @@ package bootstrapcontroller import ( "context" + cryptorand "crypto/rand" + "crypto/rsa" + "crypto/x509" + "crypto/x509/pkix" + "encoding/pem" + "math/big" "testing" "time" @@ -19,6 +25,7 @@ import ( clienttesting "k8s.io/client-go/testing" clientcmdapi "k8s.io/client-go/tools/clientcmd/api" clientcmdlatest "k8s.io/client-go/tools/clientcmd/api/latest" + certutil "k8s.io/client-go/util/cert" ) func TestSync(t *testing.T) { @@ -37,6 +44,21 @@ func TestSync(t *testing.T) { } }, }, + { + name: "checking the hub kubeconfig secret", + queueKey: "test/test", + objects: []runtime.Object{ + newSecret("bootstrap-hub-kubeconfig", "test", newKubeConfig("https://10.0.118.47:6443")), + newHubKubeConfigSecret("test", time.Now().Add(-60*time.Second).UTC()), + newDeployment("test-registration-agent", "test"), + newDeployment("test-work-agent", "test"), + }, + validateActions: func(t *testing.T, actions []clienttesting.Action) { + testinghelper.AssertDelete(t, actions[0], "secrets", "test", "hub-kubeconfig-secret") + testinghelper.AssertDelete(t, actions[1], "deployments", "test", "test-registration-agent") + testinghelper.AssertDelete(t, actions[2], "deployments", "test", "test-work-agent") + }, + }, { name: "the bootstrap is not started", queueKey: "test/test", @@ -52,7 +74,7 @@ func TestSync(t *testing.T) { queueKey: "test/test", objects: []runtime.Object{ newSecret("bootstrap-hub-kubeconfig", "test", newKubeConfig("https://10.0.118.47:6443")), - newSecret("hub-kubeconfig-secret", "test", newKubeConfig("https://10.0.118.47:6443")), + newHubKubeConfigSecret("test", time.Now().Add(60*time.Second).UTC()), }, validateActions: func(t *testing.T, actions []clienttesting.Action) { if len(actions) != 0 { @@ -64,8 +86,8 @@ func TestSync(t *testing.T) { name: "the bootstrap secret is changed", queueKey: "test/test", objects: []runtime.Object{ - newSecret("bootstrap-hub-kubeconfig", "test", newKubeConfig("https://10.0.118.47:6443")), - newSecret("hub-kubeconfig-secret", "test", newKubeConfig("https://10.0.118.48:6443")), + newSecret("bootstrap-hub-kubeconfig", "test", newKubeConfig("https://10.0.118.48:6443")), + newHubKubeConfigSecret("test", time.Now().Add(60*time.Second).UTC()), newDeployment("test-registration-agent", "test"), newDeployment("test-work-agent", "test"), }, @@ -189,6 +211,62 @@ func newKubeConfig(host string) []byte { return configData } +func newHubKubeConfigSecret(namespace string, notAfter time.Time) *corev1.Secret { + caKey, err := rsa.GenerateKey(cryptorand.Reader, 2048) + if err != nil { + panic(err) + } + + caCert, err := certutil.NewSelfSignedCACert(certutil.Config{CommonName: "open-cluster-management.io"}, caKey) + if err != nil { + panic(err) + } + + key, err := rsa.GenerateKey(cryptorand.Reader, 2048) + if err != nil { + panic(err) + } + + certDERBytes, err := x509.CreateCertificate( + cryptorand.Reader, + &x509.Certificate{ + Subject: pkix.Name{ + CommonName: "test", + }, + SerialNumber: big.NewInt(1), + NotBefore: caCert.NotBefore, + NotAfter: notAfter, + KeyUsage: x509.KeyUsageKeyEncipherment | x509.KeyUsageDigitalSignature, + ExtKeyUsage: []x509.ExtKeyUsage{x509.ExtKeyUsageClientAuth}, + }, + caCert, + key.Public(), + caKey, + ) + if err != nil { + panic(err) + } + + cert, err := x509.ParseCertificate(certDERBytes) + if err != nil { + panic(err) + } + + return &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: "hub-kubeconfig-secret", + Namespace: namespace, + }, + Data: map[string][]byte{ + "kubeconfig": newKubeConfig("https://10.0.118.47:6443"), + "tls.crt": pem.EncodeToMemory(&pem.Block{ + Type: certutil.CertificateBlockType, + Bytes: cert.Raw, + }), + }, + } +} + func newDeployment(name, namespace string) *appsv1.Deployment { return &appsv1.Deployment{ ObjectMeta: metav1.ObjectMeta{ diff --git a/test/integration/integration_suite_test.go b/test/integration/integration_suite_test.go index 2bf9ff422..282b6e33f 100644 --- a/test/integration/integration_suite_test.go +++ b/test/integration/integration_suite_test.go @@ -4,6 +4,7 @@ import ( "context" "path/filepath" "testing" + "time" "github.com/onsi/ginkgo" "github.com/onsi/gomega" @@ -19,6 +20,7 @@ import ( operatorclient "github.com/open-cluster-management/api/client/operator/clientset/versioned" operatorapiv1 "github.com/open-cluster-management/api/operator/v1" + "github.com/open-cluster-management/registration-operator/pkg/operators/klusterlet/controllers/bootstrapcontroller" ) func TestIntegration(t *testing.T) { @@ -47,6 +49,9 @@ var _ = ginkgo.BeforeSuite(func(done ginkgo.Done) { ginkgo.By("bootstrapping test environment") + // crank up the sync speed + bootstrapcontroller.BootstrapControllerSyncInterval = 2 * time.Second + var err error // install registration-operator CRDs and start a local kube-apiserver diff --git a/test/integration/klusterlet_test.go b/test/integration/klusterlet_test.go index e8c9cef84..62c2e6fdc 100644 --- a/test/integration/klusterlet_test.go +++ b/test/integration/klusterlet_test.go @@ -4,6 +4,7 @@ import ( "context" "fmt" "strings" + "time" "github.com/onsi/ginkgo" "github.com/onsi/gomega" @@ -484,6 +485,7 @@ var _ = ginkgo.Describe("Klusterlet", func() { ginkgo.AfterEach(func() { operatorClient.OperatorV1().Klusterlets().Delete(context.Background(), klusterlet.Name, metav1.DeleteOptions{}) }) + ginkgo.It("should reload the klusterlet after the bootstrap secret is changed", func() { _, err := operatorClient.OperatorV1().Klusterlets().Create(context.Background(), klusterlet, metav1.CreateOptions{}) gomega.Expect(err).NotTo(gomega.HaveOccurred()) @@ -509,6 +511,7 @@ var _ = ginkgo.Describe("Klusterlet", func() { } hubSecret.Data["cluster-name"] = []byte("testcluster") hubSecret.Data["kubeconfig"] = util.NewKubeConfig(restConfig.Host) + hubSecret.Data["tls.crt"] = util.NewCert(time.Now().Add(300 * time.Second).UTC()) if _, err = kubeClient.CoreV1().Secrets(klusterletNamespace).Update(context.Background(), hubSecret, metav1.UpdateOptions{}); err != nil { return false } @@ -555,5 +558,71 @@ var _ = ginkgo.Describe("Klusterlet", func() { return true }, eventuallyTimeout, eventuallyInterval).Should(gomega.BeTrue()) }) + + ginkgo.It("should reload the klusterlet after the hub secret is expired", func() { + _, err := operatorClient.OperatorV1().Klusterlets().Create(context.Background(), klusterlet, metav1.CreateOptions{}) + gomega.Expect(err).NotTo(gomega.HaveOccurred()) + + // Create a bootstrap secret + bootStrapSecret := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: helpers.BootstrapHubKubeConfig, + Namespace: klusterletNamespace, + }, + Data: map[string][]byte{ + "kubeconfig": util.NewKubeConfig(restConfig.Host), + }, + } + _, err = kubeClient.CoreV1().Secrets(klusterletNamespace).Create(context.Background(), bootStrapSecret, metav1.CreateOptions{}) + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + + // Get the deployments + var registrationDeployment *appsv1.Deployment + var workDeployment *appsv1.Deployment + gomega.Eventually(func() bool { + if registrationDeployment, err = kubeClient.AppsV1().Deployments(klusterletNamespace).Get(context.Background(), registrationDeploymentName, metav1.GetOptions{}); err != nil { + return false + } + if workDeployment, err = kubeClient.AppsV1().Deployments(klusterletNamespace).Get(context.Background(), workDeploymentName, metav1.GetOptions{}); err != nil { + return false + } + return true + }, eventuallyTimeout, eventuallyInterval).Should(gomega.BeTrue()) + + // Update the hub secret and make it same with the bootstrap secret + gomega.Eventually(func() bool { + hubSecret, err := kubeClient.CoreV1().Secrets(klusterletNamespace).Get(context.Background(), helpers.HubKubeConfig, metav1.GetOptions{}) + if err != nil { + return false + } + hubSecret.Data["cluster-name"] = []byte("testcluster") + hubSecret.Data["kubeconfig"] = util.NewKubeConfig(restConfig.Host) + // the hub secret will be expired after 5 seconds + hubSecret.Data["tls.crt"] = util.NewCert(time.Now().Add(5 * time.Second).UTC()) + if _, err = kubeClient.CoreV1().Secrets(klusterletNamespace).Update(context.Background(), hubSecret, metav1.UpdateOptions{}); err != nil { + return false + } + return true + }, eventuallyTimeout, eventuallyInterval).Should(gomega.BeTrue()) + + // Make sure the deployments are deleted and recreated + gomega.Eventually(func() bool { + lastRegistrationDeployment, err := kubeClient.AppsV1().Deployments(klusterletNamespace).Get(context.Background(), registrationDeploymentName, metav1.GetOptions{}) + if err != nil { + return false + } + lastWorkDeployment, err := kubeClient.AppsV1().Deployments(klusterletNamespace).Get(context.Background(), workDeploymentName, metav1.GetOptions{}) + if err != nil { + return false + } + if registrationDeployment.UID == lastRegistrationDeployment.UID { + return false + } + if workDeployment.UID == lastWorkDeployment.UID { + return false + } + return true + }, eventuallyTimeout, eventuallyInterval).Should(gomega.BeTrue()) + }) }) }) diff --git a/test/integration/util/util.go b/test/integration/util/util.go index f7db338c7..24c850e26 100644 --- a/test/integration/util/util.go +++ b/test/integration/util/util.go @@ -1,7 +1,14 @@ package util import ( + cryptorand "crypto/rand" + "crypto/rsa" + "crypto/x509" + "crypto/x509/pkix" + "encoding/pem" "fmt" + "math/big" + "time" "github.com/onsi/ginkgo" @@ -10,6 +17,7 @@ import ( "k8s.io/apimachinery/pkg/runtime" clientcmdapi "k8s.io/client-go/tools/clientcmd/api" clientcmdlatest "k8s.io/client-go/tools/clientcmd/api/latest" + certutil "k8s.io/client-go/util/cert" ) func NewIntegrationTestEventRecorder(componet string) events.Recorder { @@ -87,3 +95,49 @@ func NewKubeConfig(host string) []byte { }) return configData } + +func NewCert(notAfter time.Time) []byte { + caKey, err := rsa.GenerateKey(cryptorand.Reader, 2048) + if err != nil { + panic(err) + } + + caCert, err := certutil.NewSelfSignedCACert(certutil.Config{CommonName: "open-cluster-management.io"}, caKey) + if err != nil { + panic(err) + } + + key, err := rsa.GenerateKey(cryptorand.Reader, 2048) + if err != nil { + panic(err) + } + + certDERBytes, err := x509.CreateCertificate( + cryptorand.Reader, + &x509.Certificate{ + Subject: pkix.Name{ + CommonName: "test", + }, + SerialNumber: big.NewInt(1), + NotBefore: caCert.NotBefore, + NotAfter: notAfter, + KeyUsage: x509.KeyUsageKeyEncipherment | x509.KeyUsageDigitalSignature, + ExtKeyUsage: []x509.ExtKeyUsage{x509.ExtKeyUsageClientAuth}, + }, + caCert, + key.Public(), + caKey, + ) + if err != nil { + panic(err) + } + + cert, err := x509.ParseCertificate(certDERBytes) + if err != nil { + panic(err) + } + return pem.EncodeToMemory(&pem.Block{ + Type: certutil.CertificateBlockType, + Bytes: cert.Raw, + }) +}