Files
kubevela/cmd/core/app/server.go
Ai Ranthem a5606b7808
Some checks failed
CodeQL / Analyze (go) (push) Failing after 6m23s
Definition-Lint / definition-doc (push) Failing after 3m8s
E2E MultiCluster Test / detect-noop (push) Successful in 6s
E2E Test / detect-noop (push) Successful in 3s
Go / detect-noop (push) Successful in 2s
license / Check for unapproved licenses (push) Failing after 18s
Registry / publish-core-images (push) Failing after 1m4s
Unit-Test / detect-noop (push) Successful in 20s
Sync SDK / sync_sdk (push) Failing after 3m9s
Go / staticcheck (push) Successful in 6m17s
Go / check-diff (push) Failing after 19m4s
Go / check-core-image-build (push) Failing after 5m44s
Go / check-cli-image-build (push) Failing after 3m31s
Unit-Test / unit-tests (push) Failing after 13m54s
Go / lint (push) Failing after 1h53m27s
Scorecards supply-chain security / Scorecards analysis (push) Failing after 1m27s
E2E MultiCluster Test / e2e-multi-cluster-tests (v1.29) (push) Has been cancelled
E2E Test / e2e-tests (v1.29) (push) Has been cancelled
Go / check-windows (push) Has been cancelled
Chore: (deps): Update k8s to 1.29 (#6654)
* chore: update k8s to 1.29

Signed-off-by: phantomnat <w.nattadej@gmail.com>

* fix: unit test

Signed-off-by: phantomnat <w.nattadej@gmail.com>

* fix: lint

Signed-off-by: phantomnat <w.nattadej@gmail.com>

* fix: lint

Signed-off-by: phantomnat <w.nattadej@gmail.com>

* fix: e2e

Signed-off-by: phantomnat <w.nattadej@gmail.com>

* fix: lint and e2e test

Signed-off-by: phantomnat <w.nattadej@gmail.com>

* test(e2e): increase timeout

Signed-off-by: phantomnat <w.nattadej@gmail.com>

* fix e2e and scripts

Signed-off-by: AiRanthem <zhongtianyun.zty@alibaba-inc.com>

* make reviewable

Signed-off-by: AiRanthem <zhongtianyun.zty@alibaba-inc.com>

* rollback a unnecessary ut change

Signed-off-by: AiRanthem <zhongtianyun.zty@alibaba-inc.com>

* update go.mod to import merged workflow

Signed-off-by: AiRanthem <zhongtianyun.zty@alibaba-inc.com>

---------

Signed-off-by: phantomnat <w.nattadej@gmail.com>
Signed-off-by: AiRanthem <zhongtianyun.zty@alibaba-inc.com>
Co-authored-by: phantomnat <w.nattadej@gmail.com>
2025-01-03 07:54:42 +08:00

316 lines
10 KiB
Go

/*
Copyright 2022 The KubeVela Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package app
import (
"context"
"fmt"
"io"
"os"
"path/filepath"
"strconv"
"time"
velaclient "github.com/kubevela/pkg/controller/client"
"github.com/kubevela/pkg/controller/sharding"
"github.com/kubevela/pkg/meta"
"github.com/kubevela/pkg/util/profiling"
"github.com/pkg/errors"
"github.com/spf13/cobra"
utilfeature "k8s.io/apiserver/pkg/util/feature"
"k8s.io/klog/v2"
"k8s.io/klog/v2/textlogger"
ctrl "sigs.k8s.io/controller-runtime"
ctrlcache "sigs.k8s.io/controller-runtime/pkg/cache"
ctrlclient "sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/healthz"
"sigs.k8s.io/controller-runtime/pkg/manager"
"sigs.k8s.io/controller-runtime/pkg/manager/signals"
metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server"
ctrlwebhook "sigs.k8s.io/controller-runtime/pkg/webhook"
"github.com/oam-dev/kubevela/apis/core.oam.dev/v1beta1"
"github.com/oam-dev/kubevela/apis/types"
"github.com/oam-dev/kubevela/cmd/core/app/hooks"
"github.com/oam-dev/kubevela/cmd/core/app/options"
"github.com/oam-dev/kubevela/pkg/auth"
"github.com/oam-dev/kubevela/pkg/cache"
commonconfig "github.com/oam-dev/kubevela/pkg/controller/common"
oamv1beta1 "github.com/oam-dev/kubevela/pkg/controller/core.oam.dev/v1beta1"
"github.com/oam-dev/kubevela/pkg/controller/core.oam.dev/v1beta1/application"
"github.com/oam-dev/kubevela/pkg/features"
"github.com/oam-dev/kubevela/pkg/monitor/watcher"
"github.com/oam-dev/kubevela/pkg/multicluster"
"github.com/oam-dev/kubevela/pkg/oam"
"github.com/oam-dev/kubevela/pkg/utils/common"
"github.com/oam-dev/kubevela/pkg/utils/util"
oamwebhook "github.com/oam-dev/kubevela/pkg/webhook/core.oam.dev"
"github.com/oam-dev/kubevela/version"
)
var (
scheme = common.Scheme
waitSecretTimeout = 90 * time.Second
waitSecretInterval = 2 * time.Second
)
// NewCoreCommand creates a *cobra.Command object with default parameters
func NewCoreCommand() *cobra.Command {
s := options.NewCoreOptions()
cmd := &cobra.Command{
Use: "vela-core",
Long: `The KubeVela controller manager is a daemon that embeds the core control loops shipped with KubeVela`,
RunE: func(cmd *cobra.Command, args []string) error {
return run(signals.SetupSignalHandler(), s)
},
SilenceUsage: true,
FParseErrWhitelist: cobra.FParseErrWhitelist{
// Allow unknown flags for backward-compatibility.
UnknownFlags: true,
},
}
fs := cmd.Flags()
namedFlagSets := s.Flags()
for _, set := range namedFlagSets.FlagSets {
fs.AddFlagSet(set)
}
meta.Name = types.VelaCoreName
klog.InfoS("KubeVela information", "version", version.VelaVersion, "revision", version.GitRevision)
klog.InfoS("Vela-Core init", "definition namespace", oam.SystemDefinitionNamespace)
return cmd
}
func run(ctx context.Context, s *options.CoreOptions) error {
restConfig := ctrl.GetConfigOrDie()
restConfig.UserAgent = types.KubeVelaName + "/" + version.GitRevision
restConfig.QPS = float32(s.QPS)
restConfig.Burst = s.Burst
restConfig.Wrap(auth.NewImpersonatingRoundTripper)
klog.InfoS("Kubernetes Config Loaded",
"UserAgent", restConfig.UserAgent,
"QPS", restConfig.QPS,
"Burst", restConfig.Burst,
)
go profiling.StartProfilingServer(nil)
// wrapper the round tripper by multi cluster rewriter
if s.EnableClusterGateway {
client, err := multicluster.Initialize(restConfig, true)
if err != nil {
klog.ErrorS(err, "failed to enable multi-cluster capability")
return err
}
if s.EnableClusterMetrics {
_, err := multicluster.NewClusterMetricsMgr(context.Background(), client, s.ClusterMetricsInterval)
if err != nil {
klog.ErrorS(err, "failed to enable multi-cluster-metrics capability")
return err
}
}
}
ctrl.SetLogger(textlogger.NewLogger(textlogger.NewConfig()))
if utilfeature.DefaultMutableFeatureGate.Enabled(features.ApplyOnce) {
commonconfig.ApplicationReSyncPeriod = s.InformerSyncPeriod
}
leaderElectionID := util.GenerateLeaderElectionID(types.KubeVelaName, s.ControllerArgs.IgnoreAppWithoutControllerRequirement)
leaderElectionID += sharding.GetShardIDSuffix()
mgr, err := ctrl.NewManager(restConfig, ctrl.Options{
Scheme: scheme,
Metrics: metricsserver.Options{
BindAddress: s.MetricsAddr,
},
LeaderElection: s.EnableLeaderElection,
LeaderElectionNamespace: s.LeaderElectionNamespace,
LeaderElectionID: leaderElectionID,
WebhookServer: ctrlwebhook.NewServer(ctrlwebhook.Options{
Port: s.WebhookPort,
CertDir: s.CertDir,
}),
HealthProbeBindAddress: s.HealthAddr,
LeaseDuration: &s.LeaseDuration,
RenewDeadline: &s.RenewDeadLine,
RetryPeriod: &s.RetryPeriod,
NewClient: velaclient.DefaultNewControllerClient,
NewCache: cache.BuildCache(ctx,
ctrlcache.Options{
Scheme: scheme,
SyncPeriod: &s.InformerSyncPeriod,
// SyncPeriod is configured with default value, aka. 10h. First, controller-runtime does not
// recommend use it as a time trigger, instead, it is expected to work for failure tolerance
// of controller-runtime. Additionally, set this value will affect not only application
// controller but also all other controllers like definition controller. Therefore, for
// functionalities like state-keep, they should be invented in other ways.
},
&v1beta1.Application{}, &v1beta1.ApplicationRevision{}, &v1beta1.ResourceTracker{},
),
Client: ctrlclient.Options{
Cache: &ctrlclient.CacheOptions{
DisableFor: cache.NewResourcesToDisableCache(),
},
},
})
if err != nil {
klog.ErrorS(err, "Unable to create a controller manager")
return err
}
if err := registerHealthChecks(mgr); err != nil {
klog.ErrorS(err, "Unable to register ready/health checks")
return err
}
if !sharding.EnableSharding {
if err = prepareRun(ctx, mgr, s); err != nil {
return err
}
} else {
if err = prepareRunInShardingMode(ctx, mgr, s); err != nil {
return err
}
}
klog.Info("Start the vela application monitor")
informer, err := mgr.GetCache().GetInformer(ctx, &v1beta1.Application{})
if err != nil {
klog.ErrorS(err, "Unable to get informer for application")
}
watcher.StartApplicationMetricsWatcher(informer)
if err := mgr.Start(ctx); err != nil {
klog.ErrorS(err, "Failed to run manager")
return err
}
if s.LogFilePath != "" {
klog.Flush()
}
klog.Info("Safely stops Program...")
return nil
}
func prepareRunInShardingMode(ctx context.Context, mgr manager.Manager, s *options.CoreOptions) error {
if sharding.IsMaster() {
klog.Infof("controller running in sharding mode, current shard is master")
if !utilfeature.DefaultMutableFeatureGate.Enabled(features.DisableWebhookAutoSchedule) {
go sharding.DefaultScheduler.Get().Start(ctx)
}
if err := prepareRun(ctx, mgr, s); err != nil {
return err
}
} else {
klog.Infof("controller running in sharding mode, current shard id: %s", sharding.ShardID)
if err := application.Setup(mgr, *s.ControllerArgs); err != nil {
return err
}
}
return nil
}
func prepareRun(ctx context.Context, mgr manager.Manager, s *options.CoreOptions) error {
if s.UseWebhook {
klog.InfoS("Enable webhook", "server port", strconv.Itoa(s.WebhookPort))
oamwebhook.Register(mgr, *s.ControllerArgs)
if err := waitWebhookSecretVolume(s.CertDir, waitSecretTimeout, waitSecretInterval); err != nil {
klog.ErrorS(err, "Unable to get webhook secret")
return err
}
}
if err := oamv1beta1.Setup(mgr, *s.ControllerArgs); err != nil {
klog.ErrorS(err, "Unable to setup the oam controller")
return err
}
if err := multicluster.InitClusterInfo(mgr.GetConfig()); err != nil {
klog.ErrorS(err, "Init control plane cluster info")
return err
}
klog.Info("Start the vela controller manager")
for _, hook := range []hooks.PreStartHook{hooks.NewSystemCRDValidationHook()} {
if err := hook.Run(ctx); err != nil {
return fmt.Errorf("failed to run hook %T: %w", hook, err)
}
}
return nil
}
// registerHealthChecks is used to create readiness&liveness probes
func registerHealthChecks(mgr ctrl.Manager) error {
klog.Info("Create readiness/health check")
if err := mgr.AddReadyzCheck("ping", healthz.Ping); err != nil {
return err
}
// TODO: change the health check to be different from readiness check
return mgr.AddHealthzCheck("ping", healthz.Ping)
}
// waitWebhookSecretVolume waits for webhook secret ready to avoid mgr running crash
func waitWebhookSecretVolume(certDir string, timeout, interval time.Duration) error {
start := time.Now()
for {
time.Sleep(interval)
if time.Since(start) > timeout {
return fmt.Errorf("getting webhook secret timeout after %s", timeout.String())
}
klog.InfoS("Wait webhook secret", "time consumed(second)", int64(time.Since(start).Seconds()),
"timeout(second)", int64(timeout.Seconds()))
if _, err := os.Stat(certDir); !os.IsNotExist(err) {
ready := func() bool {
f, err := os.Open(filepath.Clean(certDir))
if err != nil {
return false
}
defer func() {
if err := f.Close(); err != nil {
klog.Error(err, "Failed to close file")
}
}()
// check if dir is empty
if _, err := f.Readdir(1); errors.Is(err, io.EOF) {
return false
}
// check if secret files are empty
err = filepath.Walk(certDir, func(path string, info os.FileInfo, err error) error {
// even Cert dir is created, cert files are still empty for a while
if info.Size() == 0 {
return errors.New("secret is not ready")
}
return nil
})
if err == nil {
klog.InfoS("Webhook secret is ready", "time consumed(second)",
int64(time.Since(start).Seconds()))
return true
}
return false
}()
if ready {
return nil
}
}
}
}