support to check hub seceret expired

Signed-off-by: liuwei <liuweixa@redhat.com>
This commit is contained in:
liuwei
2021-05-31 09:03:48 +00:00
parent 1057c3403b
commit 8db2ecf7e1
5 changed files with 305 additions and 31 deletions

View File

@@ -4,7 +4,7 @@ import (
"bytes"
"context"
"fmt"
"strings"
"time"
operatorinformer "github.com/open-cluster-management/api/client/operator/informers/externalversions/operator/v1"
operatorlister "github.com/open-cluster-management/api/client/operator/listers/operator/v1"
@@ -22,14 +22,22 @@ import (
coreinformer "k8s.io/client-go/informers/core/v1"
"k8s.io/client-go/kubernetes"
corelister "k8s.io/client-go/listers/core/v1"
"k8s.io/client-go/tools/cache"
"k8s.io/client-go/tools/clientcmd"
clientcmdapi "k8s.io/client-go/tools/clientcmd/api"
certutil "k8s.io/client-go/util/cert"
"k8s.io/klog/v2"
)
// bootstrapController watches bootstrap hub kubeconfig secret, if the secret is changed with hub kube-apiserver ca or apiserver
// endpoints, this controller will make the klusterlet re-bootstrap to get the new hub kubeconfig from hub cluster by deleting
// the current hub kubeconfig secret and restart the klusterlet agents
const tlsCertFile = "tls.crt"
// BootstrapControllerSyncInterval is exposed so that integration tests can crank up the constroller sync speed.
var BootstrapControllerSyncInterval = 5 * time.Minute
// bootstrapController watches bootstrap-hub-kubeconfig and hub-kubeconfig-secret secrets, if the bootstrap-hub-kubeconfig secret
// is changed with hub kube-apiserver ca or apiserver endpoints, or the hub-kubeconfig-secret secret is expired, this controller
// will make the klusterlet re-bootstrap to get the new hub kubeconfig from hub cluster by deleting the current hub kubeconfig
// secret and restart the klusterlet agents
type bootstrapController struct {
kubeClient kubernetes.Interface
klusterletLister operatorlister.KlusterletLister
@@ -49,6 +57,7 @@ func NewBootstrapController(
}
return factory.New().WithSync(controller.sync).
WithInformersQueueKeyFunc(bootstrapSecretQueueKeyFunc(controller.klusterletLister), secretInformer.Informer()).
ResyncEvery(BootstrapControllerSyncInterval).
ToController("BootstrapController", recorder)
}
@@ -58,15 +67,33 @@ func (k *bootstrapController) sync(ctx context.Context, controllerContext factor
return nil
}
keys := strings.Split(queueKey, "/")
if len(keys) != 2 {
// this should not happen, do nothing
klog.V(4).Infof("Reconciling klusterlet kubeconfig secrets %q", queueKey)
klusterletNamespace, klusterletName, err := cache.SplitMetaNamespaceKey(queueKey)
if err != nil {
// ignore bad format key
return nil
}
klusterletNamespace := keys[0]
klusterletName := keys[1]
klog.V(4).Infof("Reconciling bootstrap hub kubeconfig secret %q", klusterletNamespace+"/"+helpers.BootstrapHubKubeConfig)
// triggered by resync, checking whether the hub kubeconfig secret is expired
if klusterletNamespace == "" && klusterletName == factory.DefaultQueueKey {
klusterlets, err := k.klusterletLister.List(labels.Everything())
if err != nil {
return err
}
for _, klusterlet := range klusterlets {
namespace := klusterlet.Spec.Namespace
if namespace == "" {
namespace = helpers.KlusterletDefaultNamespace
}
// enqueue the klusterlet to reconcile
controllerContext.Queue().Add(fmt.Sprintf("%s/%s", namespace, klusterlet.Name))
}
return nil
}
bootstrapHubKubeconfigSecret, err := k.secretLister.Secrets(klusterletNamespace).Get(helpers.BootstrapHubKubeConfig)
switch {
@@ -81,14 +108,18 @@ func (k *bootstrapController) sync(ctx context.Context, controllerContext factor
if err != nil {
// a bad bootstrap secret, ignore it
controllerContext.Recorder().Warningf("BadBootstrapSecret",
fmt.Sprintf("unable to load hub kubeconfig from secret %q: %v", klusterletNamespace+"/"+helpers.BootstrapHubKubeConfig, err))
fmt.Sprintf("unable to load hub kubeconfig from secret %s/%s: %v", klusterletNamespace, helpers.BootstrapHubKubeConfig, err))
return nil
}
hubKubeconfigSecret, err := k.secretLister.Secrets(klusterletNamespace).Get(helpers.HubKubeConfig)
switch {
case errors.IsNotFound(err):
// the hub kubeconfig secret not found, could not have bootstrap yet, do nothing
// the hub kubeconfig secret not found, could not have bootstrap yet, do nothing currently
// TODO one case should be supported in the future: the bootstrap phase may be failed due to
// the content of bootstrap secret is wrong, this also results in the hub kubeconfig secret
// cannot be found. In this case, user may need to correct the bootstrap secret. we need to
// find a way to know the bootstrap secret is corrected, and then reload the klusterlet
return nil
case err != nil:
return err
@@ -98,46 +129,58 @@ func (k *bootstrapController) sync(ctx context.Context, controllerContext factor
if err != nil {
// the hub kubeconfig secret has errors, do nothing
controllerContext.Recorder().Warningf("BadHubKubeConfigSecret",
fmt.Sprintf("unable to load hub kubeconfig from secret %q: %v", klusterletNamespace+"/"+helpers.BootstrapHubKubeConfig, err))
fmt.Sprintf("unable to load hub kubeconfig from secret %s/%s: %v", klusterletNamespace, helpers.BootstrapHubKubeConfig, err))
return nil
}
// the CA and server are not changed in bootstrap kubeconfig secret, ignore this change
if bootstrapKubeconfig.Server == hubKubeconfig.Server &&
bytes.Equal(bootstrapKubeconfig.CertificateAuthorityData, hubKubeconfig.CertificateAuthorityData) {
if bootstrapKubeconfig.Server != hubKubeconfig.Server ||
!bytes.Equal(bootstrapKubeconfig.CertificateAuthorityData, hubKubeconfig.CertificateAuthorityData) {
// the bootstrap kubeconfig secret is changed, reload the klusterlet agents
reloadReason := fmt.Sprintf("the bootstrap secret %s/%s is changed", klusterletNamespace, helpers.BootstrapHubKubeConfig)
return k.reloadAgents(ctx, controllerContext, klusterletNamespace, klusterletName, reloadReason)
}
expired, err := isHubKubeconfigSecretExpired(hubKubeconfigSecret)
if err != nil {
// the hub kubeconfig secret has errors, do nothing
controllerContext.Recorder().Warningf("BadHubKubeConfigSecret",
fmt.Sprintf("the hub kubeconfig secret %s/%s is invalid: %v", klusterletNamespace, helpers.HubKubeConfig, err))
return nil
}
// the bootstrap kubeconfig secret is changed, reload the klusterlet agents
return k.reloadAgents(ctx, controllerContext, klusterletNamespace, klusterletName)
// the hub kubeconfig secret cert is not expired, do nothing
if !expired {
return nil
}
// the hub kubeconfig secret cert is expired, reload klusterlet to restart bootstrap
reloadReason := fmt.Sprintf("the hub kubeconfig secret %s/%s is expired", klusterletNamespace, helpers.HubKubeConfig)
return k.reloadAgents(ctx, controllerContext, klusterletNamespace, klusterletName, reloadReason)
}
// reloadAgents reload klusterlet agents by
// 1. make the registration agent re-bootstrap by deleting the current hub kubeconfig secret to
// 2. restart the registration and work agents to reload the new hub ca by deleting the agent deployments
func (k *bootstrapController) reloadAgents(ctx context.Context, ctrlContext factory.SyncContext, namespace, klusterletName string) error {
func (k *bootstrapController) reloadAgents(ctx context.Context, ctrlContext factory.SyncContext, namespace, klusterletName, reason string) error {
if err := k.kubeClient.CoreV1().Secrets(namespace).Delete(ctx, helpers.HubKubeConfig, metav1.DeleteOptions{}); err != nil {
return err
}
ctrlContext.Recorder().Eventf("HubKubeconfigSecretDeleted",
fmt.Sprintf("the hub kubeconfig secret %q is deleted due to the bootstrap secret %q is changed",
namespace+"/"+helpers.HubKubeConfig, namespace+"/"+helpers.BootstrapHubKubeConfig))
ctrlContext.Recorder().Eventf("HubKubeconfigSecretDeleted", fmt.Sprintf("the hub kubeconfig secret %s/%s is deleted due to %s",
namespace, helpers.HubKubeConfig, reason))
registrationName := fmt.Sprintf("%s-registration-agent", klusterletName)
if err := k.kubeClient.AppsV1().Deployments(namespace).Delete(ctx, registrationName, metav1.DeleteOptions{}); err != nil {
return err
}
ctrlContext.Recorder().Eventf("KlusterletAgentDeploymentDeleted",
fmt.Sprintf("the deployment %q is deleted due to the bootstrap secret %q is changed",
namespace+"/"+registrationName, namespace+"/"+helpers.BootstrapHubKubeConfig))
ctrlContext.Recorder().Eventf("KlusterletAgentDeploymentDeleted", fmt.Sprintf("the deployment %s/%s is deleted due to %s",
namespace, registrationName, reason))
workName := fmt.Sprintf("%s-work-agent", klusterletName)
if err := k.kubeClient.AppsV1().Deployments(namespace).Delete(ctx, workName, metav1.DeleteOptions{}); err != nil {
return err
}
ctrlContext.Recorder().Eventf("KlusterletAgentDeploymentDeleted",
fmt.Sprintf("the deployment %q is deleted due to the bootstrap secret %q is changed",
namespace+"/"+workName, namespace+"/"+helpers.BootstrapHubKubeConfig))
ctrlContext.Recorder().Eventf("KlusterletAgentDeploymentDeleted", fmt.Sprintf("the deployment %s/%s is deleted due to %s",
namespace, workName, reason))
return nil
}
@@ -186,3 +229,28 @@ func bootstrapSecretQueueKeyFunc(klusterletLister operatorlister.KlusterletListe
return ""
}
}
func isHubKubeconfigSecretExpired(secret *corev1.Secret) (bool, error) {
certData, ok := secret.Data[tlsCertFile]
if !ok {
return false, fmt.Errorf("there is no %q", tlsCertFile)
}
certs, err := certutil.ParseCertsPEM(certData)
if err != nil {
return false, fmt.Errorf("failed to parse cert: %v", err)
}
if len(certs) == 0 {
return false, fmt.Errorf("there are no certs in %q", tlsCertFile)
}
now := time.Now()
for _, cert := range certs {
if now.After(cert.NotAfter) {
return true, nil
}
}
return false, nil
}

View File

@@ -2,6 +2,12 @@ package bootstrapcontroller
import (
"context"
cryptorand "crypto/rand"
"crypto/rsa"
"crypto/x509"
"crypto/x509/pkix"
"encoding/pem"
"math/big"
"testing"
"time"
@@ -19,6 +25,7 @@ import (
clienttesting "k8s.io/client-go/testing"
clientcmdapi "k8s.io/client-go/tools/clientcmd/api"
clientcmdlatest "k8s.io/client-go/tools/clientcmd/api/latest"
certutil "k8s.io/client-go/util/cert"
)
func TestSync(t *testing.T) {
@@ -37,6 +44,21 @@ func TestSync(t *testing.T) {
}
},
},
{
name: "checking the hub kubeconfig secret",
queueKey: "test/test",
objects: []runtime.Object{
newSecret("bootstrap-hub-kubeconfig", "test", newKubeConfig("https://10.0.118.47:6443")),
newHubKubeConfigSecret("test", time.Now().Add(-60*time.Second).UTC()),
newDeployment("test-registration-agent", "test"),
newDeployment("test-work-agent", "test"),
},
validateActions: func(t *testing.T, actions []clienttesting.Action) {
testinghelper.AssertDelete(t, actions[0], "secrets", "test", "hub-kubeconfig-secret")
testinghelper.AssertDelete(t, actions[1], "deployments", "test", "test-registration-agent")
testinghelper.AssertDelete(t, actions[2], "deployments", "test", "test-work-agent")
},
},
{
name: "the bootstrap is not started",
queueKey: "test/test",
@@ -52,7 +74,7 @@ func TestSync(t *testing.T) {
queueKey: "test/test",
objects: []runtime.Object{
newSecret("bootstrap-hub-kubeconfig", "test", newKubeConfig("https://10.0.118.47:6443")),
newSecret("hub-kubeconfig-secret", "test", newKubeConfig("https://10.0.118.47:6443")),
newHubKubeConfigSecret("test", time.Now().Add(60*time.Second).UTC()),
},
validateActions: func(t *testing.T, actions []clienttesting.Action) {
if len(actions) != 0 {
@@ -64,8 +86,8 @@ func TestSync(t *testing.T) {
name: "the bootstrap secret is changed",
queueKey: "test/test",
objects: []runtime.Object{
newSecret("bootstrap-hub-kubeconfig", "test", newKubeConfig("https://10.0.118.47:6443")),
newSecret("hub-kubeconfig-secret", "test", newKubeConfig("https://10.0.118.48:6443")),
newSecret("bootstrap-hub-kubeconfig", "test", newKubeConfig("https://10.0.118.48:6443")),
newHubKubeConfigSecret("test", time.Now().Add(60*time.Second).UTC()),
newDeployment("test-registration-agent", "test"),
newDeployment("test-work-agent", "test"),
},
@@ -189,6 +211,62 @@ func newKubeConfig(host string) []byte {
return configData
}
func newHubKubeConfigSecret(namespace string, notAfter time.Time) *corev1.Secret {
caKey, err := rsa.GenerateKey(cryptorand.Reader, 2048)
if err != nil {
panic(err)
}
caCert, err := certutil.NewSelfSignedCACert(certutil.Config{CommonName: "open-cluster-management.io"}, caKey)
if err != nil {
panic(err)
}
key, err := rsa.GenerateKey(cryptorand.Reader, 2048)
if err != nil {
panic(err)
}
certDERBytes, err := x509.CreateCertificate(
cryptorand.Reader,
&x509.Certificate{
Subject: pkix.Name{
CommonName: "test",
},
SerialNumber: big.NewInt(1),
NotBefore: caCert.NotBefore,
NotAfter: notAfter,
KeyUsage: x509.KeyUsageKeyEncipherment | x509.KeyUsageDigitalSignature,
ExtKeyUsage: []x509.ExtKeyUsage{x509.ExtKeyUsageClientAuth},
},
caCert,
key.Public(),
caKey,
)
if err != nil {
panic(err)
}
cert, err := x509.ParseCertificate(certDERBytes)
if err != nil {
panic(err)
}
return &corev1.Secret{
ObjectMeta: metav1.ObjectMeta{
Name: "hub-kubeconfig-secret",
Namespace: namespace,
},
Data: map[string][]byte{
"kubeconfig": newKubeConfig("https://10.0.118.47:6443"),
"tls.crt": pem.EncodeToMemory(&pem.Block{
Type: certutil.CertificateBlockType,
Bytes: cert.Raw,
}),
},
}
}
func newDeployment(name, namespace string) *appsv1.Deployment {
return &appsv1.Deployment{
ObjectMeta: metav1.ObjectMeta{

View File

@@ -4,6 +4,7 @@ import (
"context"
"path/filepath"
"testing"
"time"
"github.com/onsi/ginkgo"
"github.com/onsi/gomega"
@@ -19,6 +20,7 @@ import (
operatorclient "github.com/open-cluster-management/api/client/operator/clientset/versioned"
operatorapiv1 "github.com/open-cluster-management/api/operator/v1"
"github.com/open-cluster-management/registration-operator/pkg/operators/klusterlet/controllers/bootstrapcontroller"
)
func TestIntegration(t *testing.T) {
@@ -47,6 +49,9 @@ var _ = ginkgo.BeforeSuite(func(done ginkgo.Done) {
ginkgo.By("bootstrapping test environment")
// crank up the sync speed
bootstrapcontroller.BootstrapControllerSyncInterval = 2 * time.Second
var err error
// install registration-operator CRDs and start a local kube-apiserver

View File

@@ -4,6 +4,7 @@ import (
"context"
"fmt"
"strings"
"time"
"github.com/onsi/ginkgo"
"github.com/onsi/gomega"
@@ -484,6 +485,7 @@ var _ = ginkgo.Describe("Klusterlet", func() {
ginkgo.AfterEach(func() {
operatorClient.OperatorV1().Klusterlets().Delete(context.Background(), klusterlet.Name, metav1.DeleteOptions{})
})
ginkgo.It("should reload the klusterlet after the bootstrap secret is changed", func() {
_, err := operatorClient.OperatorV1().Klusterlets().Create(context.Background(), klusterlet, metav1.CreateOptions{})
gomega.Expect(err).NotTo(gomega.HaveOccurred())
@@ -509,6 +511,7 @@ var _ = ginkgo.Describe("Klusterlet", func() {
}
hubSecret.Data["cluster-name"] = []byte("testcluster")
hubSecret.Data["kubeconfig"] = util.NewKubeConfig(restConfig.Host)
hubSecret.Data["tls.crt"] = util.NewCert(time.Now().Add(300 * time.Second).UTC())
if _, err = kubeClient.CoreV1().Secrets(klusterletNamespace).Update(context.Background(), hubSecret, metav1.UpdateOptions{}); err != nil {
return false
}
@@ -555,5 +558,71 @@ var _ = ginkgo.Describe("Klusterlet", func() {
return true
}, eventuallyTimeout, eventuallyInterval).Should(gomega.BeTrue())
})
ginkgo.It("should reload the klusterlet after the hub secret is expired", func() {
_, err := operatorClient.OperatorV1().Klusterlets().Create(context.Background(), klusterlet, metav1.CreateOptions{})
gomega.Expect(err).NotTo(gomega.HaveOccurred())
// Create a bootstrap secret
bootStrapSecret := &corev1.Secret{
ObjectMeta: metav1.ObjectMeta{
Name: helpers.BootstrapHubKubeConfig,
Namespace: klusterletNamespace,
},
Data: map[string][]byte{
"kubeconfig": util.NewKubeConfig(restConfig.Host),
},
}
_, err = kubeClient.CoreV1().Secrets(klusterletNamespace).Create(context.Background(), bootStrapSecret, metav1.CreateOptions{})
gomega.Expect(err).ToNot(gomega.HaveOccurred())
// Get the deployments
var registrationDeployment *appsv1.Deployment
var workDeployment *appsv1.Deployment
gomega.Eventually(func() bool {
if registrationDeployment, err = kubeClient.AppsV1().Deployments(klusterletNamespace).Get(context.Background(), registrationDeploymentName, metav1.GetOptions{}); err != nil {
return false
}
if workDeployment, err = kubeClient.AppsV1().Deployments(klusterletNamespace).Get(context.Background(), workDeploymentName, metav1.GetOptions{}); err != nil {
return false
}
return true
}, eventuallyTimeout, eventuallyInterval).Should(gomega.BeTrue())
// Update the hub secret and make it same with the bootstrap secret
gomega.Eventually(func() bool {
hubSecret, err := kubeClient.CoreV1().Secrets(klusterletNamespace).Get(context.Background(), helpers.HubKubeConfig, metav1.GetOptions{})
if err != nil {
return false
}
hubSecret.Data["cluster-name"] = []byte("testcluster")
hubSecret.Data["kubeconfig"] = util.NewKubeConfig(restConfig.Host)
// the hub secret will be expired after 5 seconds
hubSecret.Data["tls.crt"] = util.NewCert(time.Now().Add(5 * time.Second).UTC())
if _, err = kubeClient.CoreV1().Secrets(klusterletNamespace).Update(context.Background(), hubSecret, metav1.UpdateOptions{}); err != nil {
return false
}
return true
}, eventuallyTimeout, eventuallyInterval).Should(gomega.BeTrue())
// Make sure the deployments are deleted and recreated
gomega.Eventually(func() bool {
lastRegistrationDeployment, err := kubeClient.AppsV1().Deployments(klusterletNamespace).Get(context.Background(), registrationDeploymentName, metav1.GetOptions{})
if err != nil {
return false
}
lastWorkDeployment, err := kubeClient.AppsV1().Deployments(klusterletNamespace).Get(context.Background(), workDeploymentName, metav1.GetOptions{})
if err != nil {
return false
}
if registrationDeployment.UID == lastRegistrationDeployment.UID {
return false
}
if workDeployment.UID == lastWorkDeployment.UID {
return false
}
return true
}, eventuallyTimeout, eventuallyInterval).Should(gomega.BeTrue())
})
})
})

View File

@@ -1,7 +1,14 @@
package util
import (
cryptorand "crypto/rand"
"crypto/rsa"
"crypto/x509"
"crypto/x509/pkix"
"encoding/pem"
"fmt"
"math/big"
"time"
"github.com/onsi/ginkgo"
@@ -10,6 +17,7 @@ import (
"k8s.io/apimachinery/pkg/runtime"
clientcmdapi "k8s.io/client-go/tools/clientcmd/api"
clientcmdlatest "k8s.io/client-go/tools/clientcmd/api/latest"
certutil "k8s.io/client-go/util/cert"
)
func NewIntegrationTestEventRecorder(componet string) events.Recorder {
@@ -87,3 +95,49 @@ func NewKubeConfig(host string) []byte {
})
return configData
}
func NewCert(notAfter time.Time) []byte {
caKey, err := rsa.GenerateKey(cryptorand.Reader, 2048)
if err != nil {
panic(err)
}
caCert, err := certutil.NewSelfSignedCACert(certutil.Config{CommonName: "open-cluster-management.io"}, caKey)
if err != nil {
panic(err)
}
key, err := rsa.GenerateKey(cryptorand.Reader, 2048)
if err != nil {
panic(err)
}
certDERBytes, err := x509.CreateCertificate(
cryptorand.Reader,
&x509.Certificate{
Subject: pkix.Name{
CommonName: "test",
},
SerialNumber: big.NewInt(1),
NotBefore: caCert.NotBefore,
NotAfter: notAfter,
KeyUsage: x509.KeyUsageKeyEncipherment | x509.KeyUsageDigitalSignature,
ExtKeyUsage: []x509.ExtKeyUsage{x509.ExtKeyUsageClientAuth},
},
caCert,
key.Public(),
caKey,
)
if err != nil {
panic(err)
}
cert, err := x509.ParseCertificate(certDERBytes)
if err != nil {
panic(err)
}
return pem.EncodeToMemory(&pem.Block{
Type: certutil.CertificateBlockType,
Bytes: cert.Raw,
})
}