Files
open-cluster-management/pkg/registration/register/csr/csr.go
Yang Le 9d1a993e2c
Some checks failed
Post / images (amd64, placement) (push) Failing after 50s
Post / images (amd64, registration) (push) Failing after 41s
Post / images (amd64, registration-operator) (push) Failing after 40s
Post / images (amd64, work) (push) Failing after 44s
Post / images (arm64, addon-manager) (push) Failing after 41s
Post / images (arm64, placement) (push) Failing after 42s
Post / images (arm64, registration) (push) Failing after 41s
Post / images (amd64, addon-manager) (push) Failing after 5m29s
Post / images (arm64, registration-operator) (push) Failing after 43s
Post / images (arm64, work) (push) Failing after 5m35s
Post / image manifest (addon-manager) (push) Has been skipped
Post / image manifest (placement) (push) Has been skipped
Post / image manifest (registration) (push) Has been skipped
Post / image manifest (registration-operator) (push) Has been skipped
Post / image manifest (work) (push) Has been skipped
Post / trigger clusteradm e2e (push) Has been skipped
Post / coverage (push) Failing after 40m4s
Scorecard supply-chain security / Scorecard analysis (push) Failing after 4m1s
add token driver for addon registration (#1343)
Signed-off-by: Yang Le <yangle@redhat.com>
2026-01-28 05:41:52 +00:00

621 lines
20 KiB
Go

package csr
import (
"context"
"crypto/tls"
"crypto/x509/pkix"
"errors"
"fmt"
"math/rand"
"os"
"path"
"strings"
"time"
certificates "k8s.io/api/certificates/v1"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/equality"
"k8s.io/apimachinery/pkg/api/meta"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/informers"
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/tools/cache"
clientcmdapi "k8s.io/client-go/tools/clientcmd/api"
certutil "k8s.io/client-go/util/cert"
"k8s.io/client-go/util/keyutil"
"k8s.io/klog/v2"
"k8s.io/utils/pointer"
addonv1alpha1 "open-cluster-management.io/api/addon/v1alpha1"
clusterv1 "open-cluster-management.io/api/cluster/v1"
"open-cluster-management.io/sdk-go/pkg/basecontroller/events"
"open-cluster-management.io/sdk-go/pkg/basecontroller/factory"
"open-cluster-management.io/ocm/pkg/registration/hub/user"
"open-cluster-management.io/ocm/pkg/registration/register"
"open-cluster-management.io/ocm/pkg/registration/register/token"
)
const (
// TLSKeyFile is the name of tls key file in kubeconfigSecret
TLSKeyFile = "tls.key"
// TLSCertFile is the name of the tls cert file in kubeconfigSecret
TLSCertFile = "tls.crt"
// ClusterCertificateRotatedCondition is a condition type that client certificate is rotated
ClusterCertificateRotatedCondition = "ClusterCertificateRotated"
indexByCluster = "indexByCluster"
indexByAddon = "indexByAddon"
// TODO(qiujian16) expose it if necessary in the future.
clusterCSRThreshold = 10
addonCSRThreshold = 10
)
type CSRDriver struct {
// csrName is the name of csr created by controller and waiting for approval.
csrName string
// keyData is the private key data used to created a csr
// csrName and keyData store the internal state of the controller. They are set after controller creates a new csr
// and cleared once the csr is approved and processed by controller. There are 4 combination of their values:
// 1. csrName empty, keyData empty: means we aren't trying to create a new client cert, our current one is valid
// 2. csrName set, keyData empty: there was bug
// 3. csrName set, keyData set: we are waiting for a new cert to be signed.
// 4. csrName empty, keydata set: the CSR failed to create, this shouldn't happen, it's a bug.
keyData []byte
csrControl CSRControl
// addonClients holds the addon clients and informers (for addon driver only)
addonClients *register.AddOnClients
// tokenControl is used for token-based addon authentication
tokenControl token.TokenControl
// HaltCSRCreation halt the csr creation
haltCSRCreation func() bool
opt register.CSRConfiguration
csrOption *CSROption
}
func (c *CSRDriver) Process(
ctx context.Context, controllerName string, secret *corev1.Secret, additionalSecretData map[string][]byte,
recorder events.Recorder) (*corev1.Secret, *metav1.Condition, error) {
logger := klog.FromContext(ctx)
// reconcile pending csr if exists
if len(c.csrName) > 0 {
// build a secret data map if the csr is approved
newSecretConfig, err := func() (map[string][]byte, error) {
// skip if there is no ongoing csr
if len(c.csrName) == 0 {
return nil, fmt.Errorf("no ongoing csr")
}
// skip if csr is not approved yet
isApproved, err := c.csrControl.IsApproved(c.csrName)
if err != nil {
return nil, err
}
if !isApproved {
return nil, nil
}
// skip if csr is not issued
certData, err := c.csrControl.GetIssuedCertificate(c.csrName)
if err != nil {
return nil, err
}
if len(certData) == 0 {
return nil, nil
}
logger.Info("Sync csr", "name", c.csrName)
// check if cert in csr status matches with the corresponding private key
if c.keyData == nil {
return nil, fmt.Errorf("no private key found for certificate in csr: %s", c.csrName)
}
_, err = tls.X509KeyPair(certData, c.keyData)
if err != nil {
return nil, fmt.Errorf("private key does not match with the certificate in csr: %s", c.csrName)
}
data := map[string][]byte{
TLSCertFile: certData,
TLSKeyFile: c.keyData,
}
return data, nil
}()
if err != nil {
c.reset()
return secret, &metav1.Condition{
Type: "ClusterCertificateRotated",
Status: metav1.ConditionFalse,
Reason: "ClientCertificateUpdateFailed",
Message: fmt.Sprintf("Failed to rotated client certificate %v", err),
}, err
}
if len(newSecretConfig) == 0 {
return nil, nil, nil
}
// append additional data into client certificate secret
for k, v := range newSecretConfig {
secret.Data[k] = v
}
notBefore, notAfter, err := getCertValidityPeriod(secret)
cond := &metav1.Condition{
Type: "ClusterCertificateRotated",
Status: metav1.ConditionTrue,
Reason: "ClientCertificateUpdated",
Message: fmt.Sprintf("client certificate rotated starting from %v to %v", *notBefore, *notAfter),
}
if err != nil {
cond = &metav1.Condition{
Type: "ClusterCertificateRotated",
Status: metav1.ConditionFalse,
Reason: "ClientCertificateUpdateFailed",
Message: fmt.Sprintf("Failed to rotated client certificate %v", err),
}
} else {
recorder.Eventf(ctx, "ClientCertificateCreated", "A new client certificate for %s is available", controllerName)
}
c.reset()
return secret, cond, err
}
// create a csr to request new client certificate if
// a. there is no valid client certificate issued for the current cluster/agent;
// b. client certificate is sensitive to the additional secret data and the data changes;
// c. client certificate exists and has less than a random percentage range from 20% to 25% of its life remaining;
shouldCreate, err := shouldCreateCSR(
ctx,
logger,
controllerName,
secret,
recorder,
c.csrOption.Subject,
additionalSecretData)
if err != nil {
return secret, nil, err
}
if !shouldCreate {
return nil, nil, nil
}
shouldHalt := c.haltCSRCreation()
if shouldHalt {
recorder.Eventf(ctx, "ClientCertificateCreationHalted",
"Stop creating CSR for %s since there are too many CSRs created already on the hub.", controllerName)
return nil, &metav1.Condition{
Type: "ClusterCertificateRotated",
Status: metav1.ConditionFalse,
Reason: "ClientCertificateUpdateFailed",
Message: "Stop creating csr since there are too many csr created already on hub",
}, nil
}
keyData, createdCSRName, err := func() ([]byte, string, error) {
// create a new private key
keyData, err := keyutil.MakeEllipticPrivateKeyPEM()
if err != nil {
return nil, "", err
}
privateKey, err := keyutil.ParsePrivateKeyPEM(keyData)
if err != nil {
return keyData, "", fmt.Errorf("invalid private key for certificate request: %w", err)
}
csrData, err := certutil.MakeCSR(privateKey, c.csrOption.Subject, c.csrOption.DNSNames, nil)
if err != nil {
return keyData, "", fmt.Errorf("unable to generate certificate request: %w", err)
}
// do not set expiration second if it is 0
expirationSeconds := pointer.Int32(c.opt.GetExpirationSeconds())
if *expirationSeconds == 0 {
expirationSeconds = nil
}
createdCSRName, err := c.csrControl.Create(
ctx, recorder, c.csrOption.ObjectMeta, csrData, c.csrOption.SignerName, expirationSeconds)
if err != nil {
return keyData, "", err
}
return keyData, createdCSRName, nil
}()
if err != nil {
return nil, &metav1.Condition{
Type: "ClusterCertificateRotated",
Status: metav1.ConditionFalse,
Reason: "ClientCertificateUpdateFailed",
Message: fmt.Sprintf("Failed to create CSR %v", err),
}, err
}
c.keyData = keyData
c.csrName = createdCSRName
return nil, nil, nil
}
func (c *CSRDriver) reset() {
c.csrName = ""
c.keyData = nil
}
func (c *CSRDriver) BuildKubeConfigFromTemplate(kubeConfig *clientcmdapi.Config) *clientcmdapi.Config {
kubeConfig.AuthInfos = map[string]*clientcmdapi.AuthInfo{register.DefaultKubeConfigAuth: {
ClientCertificate: TLSCertFile,
ClientKey: TLSKeyFile,
}}
return kubeConfig
}
func (c *CSRDriver) InformerHandler() (cache.SharedIndexInformer, factory.EventFilterFunc) {
return c.csrControl.Informer(), c.csrOption.EventFilterFunc
}
func (c *CSRDriver) IsHubKubeConfigValid(ctx context.Context, secretOption register.SecretOption) (bool, error) {
logger := klog.FromContext(ctx)
keyPath := path.Join(secretOption.HubKubeconfigDir, TLSKeyFile)
if _, err := os.Stat(keyPath); os.IsNotExist(err) {
logger.V(4).Info("TLS key file not found", "keyPath", keyPath)
return false, nil
}
certPath := path.Join(secretOption.HubKubeconfigDir, TLSCertFile)
certData, err := os.ReadFile(path.Clean(certPath))
if err != nil {
logger.V(4).Info("Unable to load TLS cert file", "certPath", certPath)
return false, nil
}
// only set when clustername/agentname are set
if len(secretOption.ClusterName) > 0 && len(secretOption.AgentName) > 0 {
// check if the tls certificate is issued for the current cluster/agent
clusterNameInCert, agentNameInCert, err := GetClusterAgentNamesFromCertificate(certData)
if err != nil {
return false, nil
}
if secretOption.ClusterName != clusterNameInCert || secretOption.AgentName != agentNameInCert {
logger.V(4).Info("Certificate in file is issued for different agent",
"certPath", certPath,
"issuedFor", fmt.Sprintf("%s:%s", clusterNameInCert, agentNameInCert),
"expectedFor", fmt.Sprintf("%s:%s", secretOption.ClusterName, secretOption.AgentName))
return false, nil
}
}
return IsCertificateValid(logger, certData, nil)
}
func (c *CSRDriver) ManagedClusterDecorator(cluster *clusterv1.ManagedCluster) *clusterv1.ManagedCluster {
return cluster
}
func (c *CSRDriver) Fork(addonName string, authConfig register.AddonAuthConfig, secretOption register.SecretOption) (register.RegisterDriver, error) {
// Check if token-based authentication should be used (shared helper)
tokenDriver, err := token.TryForkTokenDriver(addonName, authConfig, secretOption, c.tokenControl, c.addonClients)
if err != nil {
return nil, err
}
if tokenDriver != nil {
return tokenDriver, nil
}
// For CSR driver, return a CSR-based driver
// This handles:
// - CustomSigner type (secretOption.Signer != KubeAPIServerClientSignerName)
// - KubeClient type with CSR authentication
// Get CSR configuration from AddonAuthConfig (type-safe interface)
csrConfig := authConfig.GetCSRConfiguration()
if csrConfig == nil {
return nil, fmt.Errorf("CSR configuration is nil for addon %s", addonName)
}
return NewCSRDriverForAddOn(addonName, csrConfig, secretOption, c.csrControl), nil
}
func (c *CSRDriver) BuildClients(ctx context.Context, secretOption register.SecretOption, bootstrap bool) (*register.Clients, error) {
logger := klog.FromContext(ctx)
kubeConfig, err := register.KubeConfigFromSecretOption(secretOption, bootstrap)
if err != nil {
return nil, err
}
clients, err := register.BuildClientsFromConfig(kubeConfig, secretOption.ClusterName)
if err != nil {
return nil, err
}
kubeClient, err := kubernetes.NewForConfig(kubeConfig)
if err != nil {
return nil, err
}
kubeInformerFactory := informers.NewSharedInformerFactoryWithOptions(
kubeClient,
10*time.Minute,
informers.WithTweakListOptions(func(listOptions *metav1.ListOptions) {
listOptions.LabelSelector = fmt.Sprintf("%s=%s", clusterv1.ClusterNameLabelKey, secretOption.ClusterName)
}),
)
csrControl, err := NewCSRControl(logger, kubeInformerFactory.Certificates(), kubeClient)
if err != nil {
return nil, fmt.Errorf("failed to create CSR control: %w", err)
}
err = c.SetCSRControl(csrControl, secretOption.ClusterName)
if err != nil {
return nil, err
}
// Initialize addon clients and token control for addon mode after bootstrap
if !bootstrap {
c.addonClients = &register.AddOnClients{
AddonClient: clients.AddonClient,
AddonInformer: clients.AddonInformer,
}
c.tokenControl = token.NewTokenControl(kubeClient.CoreV1())
}
return clients, nil
}
func (c *CSRDriver) SetCSRControl(csrControl CSRControl, clusterName string) error {
if err := csrControl.Informer().AddIndexers(cache.Indexers{indexByCluster: indexByClusterFunc}); err != nil {
return err
}
if err := csrControl.Informer().AddIndexers(cache.Indexers{indexByAddon: indexByAddonFunc}); err != nil {
return err
}
c.csrControl = csrControl
c.haltCSRCreation = haltCSRCreationFunc(csrControl.Informer().GetIndexer(), clusterName)
return nil
}
// SetAddonClients sets the addon clients for the CSR driver
func (c *CSRDriver) SetAddonClients(addonClients *register.AddOnClients) {
c.addonClients = addonClients
}
// SetTokenControl sets the token control for the CSR driver
func (c *CSRDriver) SetTokenControl(tokenControl token.TokenControl) {
c.tokenControl = tokenControl
}
var _ register.RegisterDriver = &CSRDriver{}
var _ register.AddonDriverFactory = &CSRDriver{}
// NewCSRDriverForAddOn creates a CSRDriver for addon registration with the given parameters
func NewCSRDriverForAddOn(addonName string, csrConfig register.CSRConfiguration, secretOption register.SecretOption, csrControl CSRControl) *CSRDriver {
csrOption := &CSROption{
ObjectMeta: metav1.ObjectMeta{
GenerateName: fmt.Sprintf("addon-%s-%s-", secretOption.ClusterName, addonName),
Labels: map[string]string{
// the labels are only hints. Anyone could set/modify them.
clusterv1.ClusterNameLabelKey: secretOption.ClusterName,
addonv1alpha1.AddonLabelKey: addonName,
},
},
Subject: secretOption.Subject,
DNSNames: []string{fmt.Sprintf("%s.addon.open-cluster-management.io", addonName)},
SignerName: secretOption.Signer,
EventFilterFunc: createCSREventFilterFunc(secretOption.ClusterName, addonName, secretOption.Signer),
}
return &CSRDriver{
csrOption: csrOption,
opt: csrConfig,
csrControl: csrControl,
haltCSRCreation: haltAddonCSRCreationFunc(csrControl.Informer().GetIndexer(), secretOption.ClusterName, addonName),
}
}
func NewCSRDriver(csrConfig register.CSRConfiguration, secretOpts register.SecretOption) (*CSRDriver, error) {
signer := certificates.KubeAPIServerClientSignerName
if secretOpts.Signer != "" {
signer = secretOpts.Signer
}
// bootstrapKubeConfigFile is required when the signer is kubeclient
if signer == certificates.KubeAPIServerClientSignerName && len(secretOpts.BootStrapKubeConfigFile) == 0 {
return nil, errors.New("bootstrap-kubeconfig is required")
}
driver := &CSRDriver{
opt: csrConfig,
}
driver.csrOption = &CSROption{
ObjectMeta: metav1.ObjectMeta{
GenerateName: fmt.Sprintf("%s-", secretOpts.ClusterName),
Labels: map[string]string{
// the label is only an hint for cluster name. Anyone could set/modify it.
clusterv1.ClusterNameLabelKey: secretOpts.ClusterName,
},
},
Subject: &pkix.Name{
Organization: []string{
fmt.Sprintf("%s%s", user.SubjectPrefix, secretOpts.ClusterName),
user.ManagedClustersGroup,
},
CommonName: fmt.Sprintf("%s%s:%s", user.SubjectPrefix, secretOpts.ClusterName, secretOpts.AgentName),
},
SignerName: signer,
EventFilterFunc: func(obj interface{}) bool {
accessor, err := meta.Accessor(obj)
if err != nil {
return false
}
labels := accessor.GetLabels()
// only enqueue csr from a specific managed cluster
if labels[clusterv1.ClusterNameLabelKey] != secretOpts.ClusterName {
return false
}
// should not contain addon key
_, ok := labels[addonv1alpha1.AddonLabelKey]
if ok {
return false
}
// only enqueue csr whose name starts with the cluster name
return strings.HasPrefix(accessor.GetName(), fmt.Sprintf("%s-", secretOpts.ClusterName))
},
}
return driver, nil
}
func shouldCreateCSR(
ctx context.Context,
logger klog.Logger,
controllerName string,
secret *corev1.Secret,
recorder events.Recorder,
subject *pkix.Name,
additionalSecretData map[string][]byte) (bool, error) {
// create a csr to request new client certificate if
// a.there is no valid client certificate issued for the current cluster/agent
valid, err := IsCertificateValid(logger, secret.Data[TLSCertFile], subject)
if err != nil {
recorder.Eventf(ctx, "CertificateValidationFailed", "Failed to validate client certificate for %s: %v", controllerName, err)
return true, nil
}
if !valid {
recorder.Eventf(ctx, "NoValidCertificateFound", "No valid client certificate for %s is found. Bootstrap is required", controllerName)
return true, nil
}
// b.client certificate is sensitive to the additional secret data and the data changes
if err := hasAdditionalSecretData(additionalSecretData, secret); err != nil {
recorder.Eventf(ctx, "AdditonalSecretDataChanged", "The additional secret data is changed for %v. Re-create the client certificate for %s", err, controllerName)
return true, nil
}
// c.client certificate exists and has less than a random percentage range from 20% to 25% of its life remaining
notBefore, notAfter, err := getCertValidityPeriod(secret)
if err != nil {
return false, err
}
total := notAfter.Sub(*notBefore)
remaining := time.Until(*notAfter)
logger.V(4).Info("Client certificate for:", "name", controllerName, "time total", total,
"remaining", remaining, "remaining/total", remaining.Seconds()/total.Seconds())
threshold := jitter(0.2, 0.25)
if remaining.Seconds()/total.Seconds() > threshold {
// Do nothing if the client certificate is valid and has more than a random percentage range from 20% to 25% of its life remaining
logger.V(4).Info("Client certificate for:", "name", controllerName, "time total", total,
"remaining", remaining, "remaining/total", remaining.Seconds()/total.Seconds())
return false, nil
}
recorder.Eventf(ctx, "CertificateRotationStarted",
"The current client certificate for %s expires in %v. Start certificate rotation",
controllerName, remaining.Round(time.Second))
return true, nil
}
// hasAdditonalSecretData checks if the secret includes the expected additional secret data.
func hasAdditionalSecretData(additionalSecretData map[string][]byte, secret *corev1.Secret) error {
for k, v := range additionalSecretData {
value, ok := secret.Data[k]
if !ok {
return fmt.Errorf("key %q not found in secret %q", k, secret.Namespace+"/"+secret.Name)
}
if !equality.Semantic.DeepEqual(v, value) {
return fmt.Errorf("key %q in secret %q does not match the expected value",
k, secret.Namespace+"/"+secret.Name)
}
}
return nil
}
func jitter(percentage float64, maxFactor float64) float64 {
if maxFactor <= 0.0 {
maxFactor = 1.0
}
newPercentage := percentage + percentage*rand.Float64()*maxFactor //#nosec G404
return newPercentage
}
func indexByAddonFunc(obj interface{}) ([]string, error) {
accessor, err := meta.Accessor(obj)
if err != nil {
return nil, err
}
cluster, ok := accessor.GetLabels()[clusterv1.ClusterNameLabelKey]
if !ok {
return []string{}, nil
}
addon, ok := accessor.GetLabels()[addonv1alpha1.AddonLabelKey]
if !ok {
return []string{}, nil
}
return []string{fmt.Sprintf("%s/%s", cluster, addon)}, nil
}
func indexByClusterFunc(obj interface{}) ([]string, error) {
accessor, err := meta.Accessor(obj)
if err != nil {
return nil, err
}
cluster, ok := accessor.GetLabels()[clusterv1.ClusterNameLabelKey]
if !ok {
return []string{}, nil
}
// should not contain addon key
if _, ok := accessor.GetLabels()[addonv1alpha1.AddonLabelKey]; ok {
return []string{}, nil
}
return []string{cluster}, nil
}
func createCSREventFilterFunc(clusterName, addOnName, signerName string) factory.EventFilterFunc {
return func(obj interface{}) bool {
accessor, err := meta.Accessor(obj)
if err != nil {
return false
}
labels := accessor.GetLabels()
// only enqueue csr from a specific managed cluster
if labels[clusterv1.ClusterNameLabelKey] != clusterName {
return false
}
// only enqueue csr created for a specific addon
if labels[addonv1alpha1.AddonLabelKey] != addOnName {
return false
}
// only enqueue csr with a specific signer name
csr, ok := obj.(*certificates.CertificateSigningRequest)
if !ok {
return false
}
if len(csr.Spec.SignerName) == 0 {
return false
}
if csr.Spec.SignerName != signerName {
return false
}
return true
}
}