mirror of
https://github.com/fluxcd/flagger.git
synced 2026-02-14 18:10:00 +00:00
Signed-off-by: Sanskar Jaiswal <jaiswalsanskar078@gmail.com> Co-authored-by: Thomas Banks
379 lines
14 KiB
Go
379 lines
14 KiB
Go
/*
|
|
Copyright 2020 The Flux authors
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License.
|
|
*/
|
|
|
|
package controller
|
|
|
|
import (
|
|
"context"
|
|
"errors"
|
|
"fmt"
|
|
"strings"
|
|
"time"
|
|
|
|
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
|
|
|
flaggerv1 "github.com/fluxcd/flagger/pkg/apis/flagger/v1beta1"
|
|
"github.com/fluxcd/flagger/pkg/metrics/observers"
|
|
"github.com/fluxcd/flagger/pkg/metrics/providers"
|
|
serving "knative.dev/serving/pkg/apis/serving/v1"
|
|
)
|
|
|
|
const (
|
|
MetricsProviderServiceSuffix = ":service"
|
|
)
|
|
|
|
// to be called during canary initialization
|
|
func (c *Controller) checkMetricProviderAvailability(canary *flaggerv1.Canary) error {
|
|
for _, metric := range canary.GetAnalysis().Metrics {
|
|
if metric.Name == "request-success-rate" || metric.Name == "request-duration" {
|
|
observerFactory := c.observerFactory
|
|
if canary.Spec.MetricsServer != "" {
|
|
var err error
|
|
observerFactory, err = observers.NewFactory(canary.Spec.MetricsServer)
|
|
if err != nil {
|
|
return fmt.Errorf("error building Prometheus client for %s %v", canary.Spec.MetricsServer, err)
|
|
}
|
|
}
|
|
if ok, err := observerFactory.Client.IsOnline(); !ok || err != nil {
|
|
return fmt.Errorf("prometheus not avaiable: %v", err)
|
|
}
|
|
continue
|
|
}
|
|
|
|
if metric.TemplateRef != nil {
|
|
namespace := canary.Namespace
|
|
if metric.TemplateRef.Namespace != canary.Namespace && metric.TemplateRef.Namespace != "" {
|
|
namespace = metric.TemplateRef.Namespace
|
|
}
|
|
|
|
template, err := c.flaggerInformers.MetricInformer.Lister().MetricTemplates(namespace).Get(metric.TemplateRef.Name)
|
|
if err != nil {
|
|
return fmt.Errorf("metric template %s.%s error: %v", metric.TemplateRef.Name, namespace, err)
|
|
}
|
|
|
|
var credentials map[string][]byte
|
|
if template.Spec.Provider.SecretRef != nil {
|
|
secret, err := c.kubeClient.CoreV1().Secrets(namespace).Get(context.TODO(), template.Spec.Provider.SecretRef.Name, metav1.GetOptions{})
|
|
if err != nil {
|
|
return fmt.Errorf("metric template %s.%s secret %s error: %v",
|
|
metric.TemplateRef.Name, namespace, template.Spec.Provider.SecretRef.Name, err)
|
|
}
|
|
credentials = secret.Data
|
|
}
|
|
|
|
factory := providers.Factory{}
|
|
provider, err := factory.Provider(metric.Interval, template.Spec.Provider, credentials, c.kubeConfig)
|
|
if err != nil {
|
|
return fmt.Errorf("metric template %s.%s provider %s error: %v",
|
|
metric.TemplateRef.Name, namespace, template.Spec.Provider.Type, err)
|
|
}
|
|
|
|
if ok, err := provider.IsOnline(); !ok || err != nil {
|
|
return fmt.Errorf("%v in metric template %s.%s not avaiable: %v", template.Spec.Provider.Type,
|
|
template.Name, template.Namespace, err)
|
|
}
|
|
}
|
|
}
|
|
c.recordEventInfof(canary, "all the metrics providers are available!")
|
|
return nil
|
|
}
|
|
|
|
func (c *Controller) runBuiltinMetricChecks(canary *flaggerv1.Canary) bool {
|
|
// override the global provider if one is specified in the canary spec
|
|
var metricsProvider string
|
|
// set the metrics provider to Crossover Prometheus when Crossover is the mesh provider
|
|
// For example, `crossover` metrics provider should be used for `smi:crossover` mesh provider
|
|
if strings.Contains(c.meshProvider, "crossover") {
|
|
metricsProvider = "crossover"
|
|
} else {
|
|
metricsProvider = c.meshProvider
|
|
}
|
|
|
|
if canary.Spec.Provider != "" {
|
|
metricsProvider = canary.Spec.Provider
|
|
|
|
// set the metrics provider to Linkerd Prometheus when Linkerd is the default mesh provider
|
|
if strings.Contains(c.meshProvider, "linkerd") {
|
|
metricsProvider = "linkerd"
|
|
}
|
|
}
|
|
// set the metrics provider to query Prometheus for the canary Kubernetes service if the canary target is Service
|
|
if canary.Spec.TargetRef.Kind == "Service" && !canary.Spec.TargetRef.IsKnativeService() {
|
|
metricsProvider = metricsProvider + MetricsProviderServiceSuffix
|
|
}
|
|
|
|
var knativeService *serving.Service
|
|
if canary.Spec.Provider == flaggerv1.KnativeProvider || c.meshProvider == flaggerv1.KnativeProvider {
|
|
var err error
|
|
knativeService, err = c.knativeClient.ServingV1().Services(canary.Namespace).Get(context.TODO(), canary.Spec.TargetRef.Name, metav1.GetOptions{})
|
|
if err != nil {
|
|
c.recordEventErrorf(canary, "Error fetching Knative service %s/%s %v", canary.Namespace, canary.Spec.TargetRef.Name, err)
|
|
return false
|
|
}
|
|
}
|
|
|
|
// create observer based on the mesh provider
|
|
observerFactory := c.observerFactory
|
|
|
|
// override the global metrics server if one is specified in the canary spec
|
|
if canary.Spec.MetricsServer != "" {
|
|
var err error
|
|
observerFactory, err = observers.NewFactory(canary.Spec.MetricsServer)
|
|
if err != nil {
|
|
c.recordEventErrorf(canary, "Error building Prometheus client for %s %v", canary.Spec.MetricsServer, err)
|
|
return false
|
|
}
|
|
}
|
|
observer := observerFactory.Observer(metricsProvider)
|
|
|
|
// run metrics checks
|
|
for _, metric := range canary.GetAnalysis().Metrics {
|
|
if metric.Interval == "" {
|
|
metric.Interval = canary.GetMetricInterval()
|
|
}
|
|
|
|
if metric.Name == "request-success-rate" {
|
|
model := toMetricModel(canary, metric.Interval, metric.TemplateVariables)
|
|
if knativeService != nil {
|
|
model.Route = knativeService.Status.LatestCreatedRevisionName
|
|
}
|
|
val, err := observer.GetRequestSuccessRate(model)
|
|
if err != nil {
|
|
if errors.Is(err, providers.ErrNoValuesFound) {
|
|
c.recordEventWarningf(canary,
|
|
"Halt advancement no values found for %s metric %s probably %s.%s is not receiving traffic: %v",
|
|
metricsProvider, metric.Name, canary.Spec.TargetRef.Name, canary.Namespace, err)
|
|
} else {
|
|
c.recordEventErrorf(canary, "Prometheus query failed: %v", err)
|
|
}
|
|
return false
|
|
}
|
|
c.recorder.SetAnalysis(canary, metric.Name, val)
|
|
if metric.ThresholdRange != nil {
|
|
tr := *metric.ThresholdRange
|
|
if tr.Min != nil && val < *tr.Min {
|
|
c.recordEventWarningf(canary, "Halt %s.%s advancement success rate %.2f%% < %v%%",
|
|
canary.Name, canary.Namespace, val, *tr.Min)
|
|
return false
|
|
}
|
|
if tr.Max != nil && val > *tr.Max {
|
|
c.recordEventWarningf(canary, "Halt %s.%s advancement success rate %.2f%% > %v%%",
|
|
canary.Name, canary.Namespace, val, *tr.Max)
|
|
return false
|
|
}
|
|
} else if metric.Threshold > val {
|
|
c.recordEventWarningf(canary, "Halt %s.%s advancement success rate %.2f%% < %v%%",
|
|
canary.Name, canary.Namespace, val, metric.Threshold)
|
|
return false
|
|
}
|
|
}
|
|
|
|
if metric.Name == "request-duration" {
|
|
model := toMetricModel(canary, metric.Interval, metric.TemplateVariables)
|
|
if knativeService != nil {
|
|
model.Route = knativeService.Status.LatestCreatedRevisionName
|
|
}
|
|
val, err := observer.GetRequestDuration(model)
|
|
if err != nil {
|
|
if errors.Is(err, providers.ErrNoValuesFound) {
|
|
c.recordEventWarningf(canary, "Halt advancement no values found for %s metric %s probably %s.%s is not receiving traffic",
|
|
metricsProvider, metric.Name, canary.Spec.TargetRef.Name, canary.Namespace)
|
|
} else {
|
|
c.recordEventErrorf(canary, "Prometheus query failed: %v", err)
|
|
}
|
|
return false
|
|
}
|
|
c.recorder.SetAnalysis(canary, metric.Name, val.Seconds())
|
|
if metric.ThresholdRange != nil {
|
|
tr := *metric.ThresholdRange
|
|
if tr.Min != nil && val < time.Duration(*tr.Min)*time.Millisecond {
|
|
c.recordEventWarningf(canary, "Halt %s.%s advancement request duration %v < %v",
|
|
canary.Name, canary.Namespace, val, time.Duration(*tr.Min)*time.Millisecond)
|
|
return false
|
|
}
|
|
if tr.Max != nil && val > time.Duration(*tr.Max)*time.Millisecond {
|
|
c.recordEventWarningf(canary, "Halt %s.%s advancement request duration %v > %v",
|
|
canary.Name, canary.Namespace, val, time.Duration(*tr.Max)*time.Millisecond)
|
|
return false
|
|
}
|
|
} else if val > time.Duration(metric.Threshold)*time.Millisecond {
|
|
c.recordEventWarningf(canary, "Halt %s.%s advancement request duration %v > %v",
|
|
canary.Name, canary.Namespace, val, time.Duration(metric.Threshold)*time.Millisecond)
|
|
return false
|
|
}
|
|
}
|
|
|
|
// in-line PromQL
|
|
if metric.Query != "" {
|
|
model := toMetricModel(canary, metric.Interval, metric.TemplateVariables)
|
|
if knativeService != nil {
|
|
model.Route = knativeService.Status.LatestCreatedRevisionName
|
|
}
|
|
query, err := observers.RenderQuery(metric.Query, model)
|
|
val, err := observerFactory.Client.RunQuery(query)
|
|
if err != nil {
|
|
if errors.Is(err, providers.ErrNoValuesFound) {
|
|
c.recordEventWarningf(canary, "Halt advancement no values found for metric: %s",
|
|
metric.Name)
|
|
} else {
|
|
c.recordEventErrorf(canary, "Prometheus query failed for %s: %v", metric.Name, err)
|
|
}
|
|
return false
|
|
}
|
|
c.recorder.SetAnalysis(canary, metric.Name, val)
|
|
if metric.ThresholdRange != nil {
|
|
tr := *metric.ThresholdRange
|
|
if tr.Min != nil && val < *tr.Min {
|
|
c.recordEventWarningf(canary, "Halt %s.%s advancement %s %.2f < %v",
|
|
canary.Name, canary.Namespace, metric.Name, val, *tr.Min)
|
|
return false
|
|
}
|
|
if tr.Max != nil && val > *tr.Max {
|
|
c.recordEventWarningf(canary, "Halt %s.%s advancement %s %.2f > %v",
|
|
canary.Name, canary.Namespace, metric.Name, val, *tr.Max)
|
|
return false
|
|
}
|
|
} else if val > metric.Threshold {
|
|
c.recordEventWarningf(canary, "Halt %s.%s advancement %s %.2f > %v",
|
|
canary.Name, canary.Namespace, metric.Name, val, metric.Threshold)
|
|
return false
|
|
}
|
|
}
|
|
}
|
|
|
|
return true
|
|
}
|
|
|
|
func (c *Controller) runMetricChecks(canary *flaggerv1.Canary) bool {
|
|
var knativeService *serving.Service
|
|
if canary.Spec.Provider == flaggerv1.KnativeProvider || c.meshProvider == flaggerv1.KnativeProvider {
|
|
var err error
|
|
knativeService, err = c.knativeClient.ServingV1().Services(canary.Namespace).Get(context.TODO(), canary.Spec.TargetRef.Name, metav1.GetOptions{})
|
|
if err != nil {
|
|
c.recordEventErrorf(canary, "Error fetching Knative service %s/%s %v", canary.Namespace, canary.Spec.TargetRef.Name, err)
|
|
return false
|
|
}
|
|
}
|
|
|
|
for _, metric := range canary.GetAnalysis().Metrics {
|
|
if metric.TemplateRef != nil {
|
|
namespace := canary.Namespace
|
|
if metric.TemplateRef.Namespace != canary.Namespace && metric.TemplateRef.Namespace != "" {
|
|
namespace = metric.TemplateRef.Namespace
|
|
}
|
|
|
|
template, err := c.flaggerInformers.MetricInformer.Lister().MetricTemplates(namespace).Get(metric.TemplateRef.Name)
|
|
if err != nil {
|
|
c.recordEventErrorf(canary, "Metric template %s.%s error: %v", metric.TemplateRef.Name, namespace, err)
|
|
return false
|
|
}
|
|
|
|
var credentials map[string][]byte
|
|
if template.Spec.Provider.SecretRef != nil {
|
|
secret, err := c.kubeClient.CoreV1().Secrets(namespace).Get(context.TODO(), template.Spec.Provider.SecretRef.Name, metav1.GetOptions{})
|
|
if err != nil {
|
|
c.recordEventErrorf(canary, "Metric template %s.%s secret %s error: %v",
|
|
metric.TemplateRef.Name, namespace, template.Spec.Provider.SecretRef.Name, err)
|
|
return false
|
|
}
|
|
credentials = secret.Data
|
|
}
|
|
|
|
factory := providers.Factory{}
|
|
provider, err := factory.Provider(metric.Interval, template.Spec.Provider, credentials, c.kubeConfig)
|
|
if err != nil {
|
|
c.recordEventErrorf(canary, "Metric template %s.%s provider %s error: %v",
|
|
metric.TemplateRef.Name, namespace, template.Spec.Provider.Type, err)
|
|
return false
|
|
}
|
|
|
|
model := toMetricModel(canary, metric.Interval, metric.TemplateVariables)
|
|
if knativeService != nil {
|
|
model.Route = knativeService.Status.LatestCreatedRevisionName
|
|
}
|
|
query, err := observers.RenderQuery(template.Spec.Query, model)
|
|
c.logger.With("canary", fmt.Sprintf("%s.%s", canary.Name, namespace)).
|
|
Debugf("Metric template %s.%s query: %s", metric.TemplateRef.Name, namespace, query)
|
|
if err != nil {
|
|
c.recordEventErrorf(canary, "Metric template %s.%s query render error: %v",
|
|
metric.TemplateRef.Name, namespace, err)
|
|
return false
|
|
}
|
|
|
|
val, err := provider.RunQuery(query)
|
|
if err != nil {
|
|
if errors.Is(err, providers.ErrNoValuesFound) {
|
|
c.recordEventWarningf(canary, "Halt advancement no values found for custom metric: %s: %v",
|
|
metric.Name, err)
|
|
} else {
|
|
c.recordEventErrorf(canary, "Metric query failed for %s: %v", metric.Name, err)
|
|
}
|
|
return false
|
|
}
|
|
|
|
c.recorder.SetAnalysis(canary, metric.Name, val)
|
|
|
|
if metric.ThresholdRange != nil {
|
|
tr := *metric.ThresholdRange
|
|
if tr.Min != nil && val < *tr.Min {
|
|
c.recordEventWarningf(canary, "Halt %s.%s advancement %s %.2f < %v",
|
|
canary.Name, canary.Namespace, metric.Name, val, *tr.Min)
|
|
return false
|
|
}
|
|
if tr.Max != nil && val > *tr.Max {
|
|
c.recordEventWarningf(canary, "Halt %s.%s advancement %s %.2f > %v",
|
|
canary.Name, canary.Namespace, metric.Name, val, *tr.Max)
|
|
return false
|
|
}
|
|
} else if val > metric.Threshold {
|
|
c.recordEventWarningf(canary, "Halt %s.%s advancement %s %.2f > %v",
|
|
canary.Name, canary.Namespace, metric.Name, val, metric.Threshold)
|
|
return false
|
|
}
|
|
} else if metric.Name != "request-success-rate" && metric.Name != "request-duration" && metric.Query == "" {
|
|
c.recordEventErrorf(canary, "Metric query failed for no usable metrics template and query were configured")
|
|
return false
|
|
}
|
|
}
|
|
|
|
return true
|
|
}
|
|
|
|
func toMetricModel(r *flaggerv1.Canary, interval string, variables map[string]string) flaggerv1.MetricTemplateModel {
|
|
service := r.Spec.TargetRef.Name
|
|
if r.Spec.Service.Name != "" {
|
|
service = r.Spec.Service.Name
|
|
}
|
|
ingress := r.Spec.TargetRef.Name
|
|
if r.Spec.IngressRef != nil {
|
|
ingress = r.Spec.IngressRef.Name
|
|
}
|
|
route := r.Spec.TargetRef.Name
|
|
if r.Spec.RouteRef != nil {
|
|
route = r.Spec.RouteRef.Name
|
|
}
|
|
return flaggerv1.MetricTemplateModel{
|
|
Name: r.Name,
|
|
Namespace: r.Namespace,
|
|
Target: r.Spec.TargetRef.Name,
|
|
Service: service,
|
|
Ingress: ingress,
|
|
Route: route,
|
|
Interval: interval,
|
|
Variables: variables,
|
|
}
|
|
}
|