From ffbc25efdad1529c22991cbc0bfd10e4695d28fb Mon Sep 17 00:00:00 2001 From: cappyzawa Date: Wed, 9 Jul 2025 00:02:08 +0900 Subject: [PATCH] Add count metrics for canary successes and failures Implement flagger_canary_successes_total and flagger_canary_failures_total counter metrics with deployment strategy detection and analysis status tracking for better observability of canary deployment outcomes. Signed-off-by: cappyzawa --- docs/gitbook/usage/monitoring.md | 6 ++ pkg/apis/flagger/v1beta1/canary.go | 28 ++++++ pkg/apis/flagger/v1beta1/canary_test.go | 94 +++++++++++++++++ pkg/controller/events.go | 37 ++++--- pkg/controller/scheduler.go | 37 ++----- pkg/controller/scheduler_metrics_test.go | 122 +++++++++-------------- pkg/metrics/recorder.go | 4 +- pkg/metrics/recorder_test.go | 23 +++++ 8 files changed, 227 insertions(+), 124 deletions(-) create mode 100644 pkg/apis/flagger/v1beta1/canary_test.go diff --git a/docs/gitbook/usage/monitoring.md b/docs/gitbook/usage/monitoring.md index 67179480..d1379991 100644 --- a/docs/gitbook/usage/monitoring.md +++ b/docs/gitbook/usage/monitoring.md @@ -121,4 +121,10 @@ flagger_canary_duration_seconds_count{name="podinfo",namespace="test"} 6 # Last canary metric analysis result per different metrics flagger_canary_metric_analysis{metric="podinfo-http-successful-rate",name="podinfo",namespace="test"} 1 flagger_canary_metric_analysis{metric="podinfo-custom-metric",name="podinfo",namespace="test"} 0.918223108974359 + +# Canary successes total counter +flagger_canary_successes_total{name="podinfo",namespace="test",deployment_strategy="canary",analysis_status="completed"} 5 + +# Canary failures total counter +flagger_canary_failures_total{name="podinfo",namespace="test",deployment_strategy="canary",analysis_status="completed"} 1 ``` diff --git a/pkg/apis/flagger/v1beta1/canary.go b/pkg/apis/flagger/v1beta1/canary.go index 1274f7a1..aae9ccd8 100644 --- a/pkg/apis/flagger/v1beta1/canary.go +++ b/pkg/apis/flagger/v1beta1/canary.go @@ -35,6 +35,13 @@ const ( MetricInterval = "1m" ) +// Deployment strategies +const ( + DeploymentStrategyCanary = "canary" + DeploymentStrategyBlueGreen = "blue-green" + DeploymentStrategyABTesting = "ab-testing" +) + // +genclient // +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object @@ -640,3 +647,24 @@ func (c *Canary) SkipAnalysis() bool { } return c.Spec.SkipAnalysis } + +// DeploymentStrategy returns the deployment strategy based on canary analysis configuration +func (c *Canary) DeploymentStrategy() string { + analysis := c.GetAnalysis() + if analysis == nil { + return DeploymentStrategyCanary + } + + // A/B Testing: has match conditions and iterations + if len(analysis.Match) > 0 && analysis.Iterations > 0 { + return DeploymentStrategyABTesting + } + + // Blue/Green: has iterations but no match conditions + if analysis.Iterations > 0 { + return DeploymentStrategyBlueGreen + } + + // Canary Release: default (has maxWeight, stepWeight, or stepWeights) + return DeploymentStrategyCanary +} diff --git a/pkg/apis/flagger/v1beta1/canary_test.go b/pkg/apis/flagger/v1beta1/canary_test.go new file mode 100644 index 00000000..113d614c --- /dev/null +++ b/pkg/apis/flagger/v1beta1/canary_test.go @@ -0,0 +1,94 @@ +/* +Copyright 2025 The Flux authors + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package v1beta1 + +import ( + "testing" + + istiov1alpha1 "github.com/fluxcd/flagger/pkg/apis/istio/common/v1alpha1" + istiov1beta1 "github.com/fluxcd/flagger/pkg/apis/istio/v1beta1" + "github.com/stretchr/testify/assert" +) + +func TestCanary_GetDeploymentStrategy(t *testing.T) { + tests := []struct { + name string + analysis *CanaryAnalysis + expected string + }{ + { + name: "canary strategy with maxWeight", + analysis: &CanaryAnalysis{ + MaxWeight: 30, + StepWeight: 10, + }, + expected: DeploymentStrategyCanary, + }, + { + name: "canary strategy with stepWeights", + analysis: &CanaryAnalysis{ + StepWeights: []int{10, 20, 30}, + }, + expected: DeploymentStrategyCanary, + }, + { + name: "blue-green strategy with iterations", + analysis: &CanaryAnalysis{ + Iterations: 5, + }, + expected: DeploymentStrategyBlueGreen, + }, + { + name: "ab-testing strategy with iterations and match", + analysis: &CanaryAnalysis{ + Iterations: 10, + Match: []istiov1beta1.HTTPMatchRequest{ + { + Headers: map[string]istiov1alpha1.StringMatch{ + "x-canary": { + Exact: "insider", + }, + }, + }, + }, + }, + expected: DeploymentStrategyABTesting, + }, + { + name: "default to canary when analysis is nil", + analysis: nil, + expected: DeploymentStrategyCanary, + }, + { + name: "default to canary when analysis is empty", + analysis: &CanaryAnalysis{}, + expected: DeploymentStrategyCanary, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + canary := &Canary{ + Spec: CanarySpec{ + Analysis: tt.analysis, + }, + } + result := canary.DeploymentStrategy() + assert.Equal(t, tt.expected, result) + }) + } +} diff --git a/pkg/controller/events.go b/pkg/controller/events.go index 17ac4fdd..591b420b 100644 --- a/pkg/controller/events.go +++ b/pkg/controller/events.go @@ -209,30 +209,35 @@ func alertMetadata(canary *flaggerv1.Canary) []notifier.Field { }, ) - if canary.GetAnalysis().StepWeight > 0 { - fields = append(fields, notifier.Field{ - Name: "Traffic routing", - Value: fmt.Sprintf("Weight step: %v max: %v", - canary.GetAnalysis().StepWeight, - canary.GetAnalysis().MaxWeight), - }) - } else if len(canary.GetAnalysis().StepWeights) > 0 { - fields = append(fields, notifier.Field{ - Name: "Traffic routing", - Value: fmt.Sprintf("Weight steps: %s max: %v", - strings.Trim(strings.Join(strings.Fields(fmt.Sprint(canary.GetAnalysis().StepWeights)), ","), "[]"), - canary.GetAnalysis().MaxWeight), - }) - } else if len(canary.GetAnalysis().Match) > 0 { + strategy := canary.DeploymentStrategy() + switch strategy { + case flaggerv1.DeploymentStrategyABTesting: fields = append(fields, notifier.Field{ Name: "Traffic routing", Value: "A/B Testing", }) - } else if canary.GetAnalysis().Iterations > 0 { + case flaggerv1.DeploymentStrategyBlueGreen: fields = append(fields, notifier.Field{ Name: "Traffic routing", Value: "Blue/Green", }) + default: + // Canary strategy + if canary.GetAnalysis().StepWeight > 0 { + fields = append(fields, notifier.Field{ + Name: "Traffic routing", + Value: fmt.Sprintf("Weight step: %v max: %v", + canary.GetAnalysis().StepWeight, + canary.GetAnalysis().MaxWeight), + }) + } else if len(canary.GetAnalysis().StepWeights) > 0 { + fields = append(fields, notifier.Field{ + Name: "Traffic routing", + Value: fmt.Sprintf("Weight steps: %s max: %v", + strings.Trim(strings.Join(strings.Fields(fmt.Sprint(canary.GetAnalysis().StepWeights)), ","), "[]"), + canary.GetAnalysis().MaxWeight), + }) + } } return fields } diff --git a/pkg/controller/scheduler.go b/pkg/controller/scheduler.go index 5d06c642..957e3508 100644 --- a/pkg/controller/scheduler.go +++ b/pkg/controller/scheduler.go @@ -38,27 +38,6 @@ func (c *Controller) min(a int, b int) int { return b } -// getDeploymentStrategy determines the deployment strategy based on canary analysis configuration -func (c *Controller) getDeploymentStrategy(canary *flaggerv1.Canary) string { - analysis := canary.GetAnalysis() - if analysis == nil { - return metrics.CanaryStrategy - } - - // A/B Testing: has match conditions and iterations - if len(analysis.Match) > 0 && analysis.Iterations > 0 { - return metrics.ABTestingStrategy - } - - // Blue/Green: has iterations but no match conditions - if analysis.Iterations > 0 { - return metrics.BlueGreenStrategy - } - - // Canary Release: default (has maxWeight, stepWeight, or stepWeights) - return metrics.CanaryStrategy -} - func (c *Controller) maxWeight(canary *flaggerv1.Canary) int { var stepWeightsLen = len(canary.GetAnalysis().StepWeights) if stepWeightsLen > 0 { @@ -425,7 +404,7 @@ func (c *Controller) advanceCanary(name string, namespace string) { c.recorder.IncSuccesses(metrics.CanaryMetricLabels{ Name: cd.Spec.TargetRef.Name, Namespace: cd.Namespace, - DeploymentStrategy: c.getDeploymentStrategy(cd), + DeploymentStrategy: cd.DeploymentStrategy(), AnalysisStatus: metrics.AnalysisStatusCompleted, }) c.runPostRolloutHooks(cd, flaggerv1.CanaryPhaseSucceeded) @@ -488,14 +467,12 @@ func (c *Controller) advanceCanary(name string, namespace string) { } } - // strategy: A/B testing - if len(cd.GetAnalysis().Match) > 0 && cd.GetAnalysis().Iterations > 0 { + strategy := cd.DeploymentStrategy() + switch strategy { + case flaggerv1.DeploymentStrategyABTesting: c.runAB(cd, canaryController, meshRouter) return - } - - // strategy: Blue/Green - if cd.GetAnalysis().Iterations > 0 { + case flaggerv1.DeploymentStrategyBlueGreen: c.runBlueGreen(cd, canaryController, meshRouter, provider, mirrored) return } @@ -845,7 +822,7 @@ func (c *Controller) shouldSkipAnalysis(canary *flaggerv1.Canary, canaryControll c.recorder.IncSuccesses(metrics.CanaryMetricLabels{ Name: canary.Spec.TargetRef.Name, Namespace: canary.Namespace, - DeploymentStrategy: c.getDeploymentStrategy(canary), + DeploymentStrategy: canary.DeploymentStrategy(), AnalysisStatus: metrics.AnalysisStatusSkipped, }) c.recordEventInfof(canary, "Promotion completed! Canary analysis was skipped for %s.%s", @@ -998,7 +975,7 @@ func (c *Controller) rollback(canary *flaggerv1.Canary, canaryController canary. c.recorder.IncFailures(metrics.CanaryMetricLabels{ Name: canary.Spec.TargetRef.Name, Namespace: canary.Namespace, - DeploymentStrategy: c.getDeploymentStrategy(canary), + DeploymentStrategy: canary.DeploymentStrategy(), AnalysisStatus: metrics.AnalysisStatusCompleted, }) c.runPostRolloutHooks(canary, flaggerv1.CanaryPhaseFailed) diff --git a/pkg/controller/scheduler_metrics_test.go b/pkg/controller/scheduler_metrics_test.go index 03606a81..1f7cd67d 100644 --- a/pkg/controller/scheduler_metrics_test.go +++ b/pkg/controller/scheduler_metrics_test.go @@ -28,9 +28,6 @@ import ( "k8s.io/client-go/tools/record" flaggerv1 "github.com/fluxcd/flagger/pkg/apis/flagger/v1beta1" - istiov1alpha1 "github.com/fluxcd/flagger/pkg/apis/istio/common/v1alpha1" - istiov1beta1 "github.com/fluxcd/flagger/pkg/apis/istio/v1beta1" - "github.com/fluxcd/flagger/pkg/metrics" "github.com/fluxcd/flagger/pkg/metrics/observers" ) @@ -190,7 +187,7 @@ func TestController_runMetricChecks(t *testing.T) { } func TestController_MetricsStateTransition(t *testing.T) { - t.Run("initialization and progression metrics", func(t *testing.T) { + t.Run("successful canary promotion with count metrics", func(t *testing.T) { mocks := newDeploymentFixture(nil) mocks.ctrl.advanceCanary("podinfo", "default") @@ -224,9 +221,22 @@ func TestController_MetricsStateTransition(t *testing.T) { totalWeight := actualPrimaryWeight + actualCanaryWeight assert.InDelta(t, 100.0, totalWeight, 1.0) + + const maxAdvanceAttempts = 10 // sufficient attempts to reach canary completion + for range maxAdvanceAttempts { + mocks.ctrl.advanceCanary("podinfo", "default") + c, err := mocks.flaggerClient.FlaggerV1beta1().Canaries("default").Get(context.TODO(), "podinfo", metav1.GetOptions{}) + require.NoError(t, err) + if c.Status.Phase == flaggerv1.CanaryPhaseSucceeded { + break + } + } + + successCount := testutil.ToFloat64(mocks.ctrl.recorder.GetSuccessesMetric().WithLabelValues("podinfo", "default", "canary", "completed")) + assert.Equal(t, float64(1), successCount) }) - t.Run("failed canary rollback", func(t *testing.T) { + t.Run("failed canary rollback with count metrics", func(t *testing.T) { mocks := newDeploymentFixture(nil) mocks.ctrl.advanceCanary("podinfo", "default") @@ -264,6 +274,37 @@ func TestController_MetricsStateTransition(t *testing.T) { actualCanaryWeight := testutil.ToFloat64(mocks.ctrl.recorder.GetWeightMetric().WithLabelValues("podinfo", "default")) assert.Equal(t, float64(100), actualPrimaryWeight) assert.Equal(t, float64(0), actualCanaryWeight) + + failureCount := testutil.ToFloat64(mocks.ctrl.recorder.GetFailuresMetric().WithLabelValues("podinfo", "default", "canary", "completed")) + assert.Equal(t, float64(1), failureCount) + }) + + t.Run("skipped analysis with count metrics", func(t *testing.T) { + mocks := newDeploymentFixture(nil) + + mocks.ctrl.advanceCanary("podinfo", "default") + mocks.makePrimaryReady(t) + mocks.ctrl.advanceCanary("podinfo", "default") + + // Enable skip analysis + cd, err := mocks.flaggerClient.FlaggerV1beta1().Canaries("default").Get(context.TODO(), "podinfo", metav1.GetOptions{}) + require.NoError(t, err) + cd.Spec.SkipAnalysis = true + _, err = mocks.flaggerClient.FlaggerV1beta1().Canaries("default").Update(context.TODO(), cd, metav1.UpdateOptions{}) + require.NoError(t, err) + + // Trigger deployment update + dep2 := newDeploymentTestDeploymentV2() + _, err = mocks.kubeClient.AppsV1().Deployments("default").Update(context.TODO(), dep2, metav1.UpdateOptions{}) + require.NoError(t, err) + + // Skip analysis should succeed immediately + mocks.ctrl.advanceCanary("podinfo", "default") + mocks.makeCanaryReady(t) + mocks.ctrl.advanceCanary("podinfo", "default") + + successCount := testutil.ToFloat64(mocks.ctrl.recorder.GetSuccessesMetric().WithLabelValues("podinfo", "default", "canary", "skipped")) + assert.Equal(t, float64(1), successCount) }) } @@ -313,74 +354,3 @@ func TestController_AnalysisMetricsRecording(t *testing.T) { assert.NotNil(t, durationMetric) }) } - -func TestController_getDeploymentStrategy(t *testing.T) { - ctrl := newDeploymentFixture(nil).ctrl - - tests := []struct { - name string - analysis *flaggerv1.CanaryAnalysis - expected string - }{ - { - name: "canary strategy with maxWeight", - analysis: &flaggerv1.CanaryAnalysis{ - MaxWeight: 30, - StepWeight: 10, - }, - expected: metrics.CanaryStrategy, - }, - { - name: "canary strategy with stepWeights", - analysis: &flaggerv1.CanaryAnalysis{ - StepWeights: []int{10, 20, 30}, - }, - expected: metrics.CanaryStrategy, - }, - { - name: "blue_green strategy with iterations", - analysis: &flaggerv1.CanaryAnalysis{ - Iterations: 5, - }, - expected: metrics.BlueGreenStrategy, - }, - { - name: "ab_testing strategy with iterations and match", - analysis: &flaggerv1.CanaryAnalysis{ - Iterations: 10, - Match: []istiov1beta1.HTTPMatchRequest{ - { - Headers: map[string]istiov1alpha1.StringMatch{ - "x-canary": { - Exact: "insider", - }, - }, - }, - }, - }, - expected: metrics.ABTestingStrategy, - }, - { - name: "default to canary when analysis is nil", - analysis: nil, - expected: metrics.CanaryStrategy, - }, - { - name: "default to canary when analysis is empty", - analysis: &flaggerv1.CanaryAnalysis{}, - expected: metrics.CanaryStrategy, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - canary := &flaggerv1.Canary{ - Spec: flaggerv1.CanarySpec{ - Analysis: tt.analysis, - }, - } - result := ctrl.getDeploymentStrategy(canary) - assert.Equal(t, tt.expected, result) - }) - } -} diff --git a/pkg/metrics/recorder.go b/pkg/metrics/recorder.go index 2a76b0b0..85e4d6e0 100644 --- a/pkg/metrics/recorder.go +++ b/pkg/metrics/recorder.go @@ -27,8 +27,8 @@ import ( // Deployment strategies const ( CanaryStrategy = "canary" - BlueGreenStrategy = "blue_green" - ABTestingStrategy = "ab_testing" + BlueGreenStrategy = "blue-green" + ABTestingStrategy = "ab-testing" ) // Analysis status diff --git a/pkg/metrics/recorder_test.go b/pkg/metrics/recorder_test.go index 66756ff2..c98ff123 100644 --- a/pkg/metrics/recorder_test.go +++ b/pkg/metrics/recorder_test.go @@ -90,6 +90,26 @@ func TestRecorder_GetterMethodsWithData(t *testing.T) { expected: 99.5, checkValue: true, }, + { + name: "IncAndGetSuccesses", + setupFunc: func(r Recorder) { + r.IncSuccesses(CanaryMetricLabels{Name: "podinfo", Namespace: "default", DeploymentStrategy: "canary", AnalysisStatus: "completed"}) + }, + getterFunc: func(r Recorder) interface{} { return r.GetSuccessesMetric() }, + labels: []string{"podinfo", "default", "canary", "completed"}, + expected: 1.0, + checkValue: true, + }, + { + name: "IncAndGetFailures", + setupFunc: func(r Recorder) { + r.IncFailures(CanaryMetricLabels{Name: "podinfo", Namespace: "default", DeploymentStrategy: "canary", AnalysisStatus: "completed"}) + }, + getterFunc: func(r Recorder) interface{} { return r.GetFailuresMetric() }, + labels: []string{"podinfo", "default", "canary", "completed"}, + expected: 1.0, + checkValue: true, + }, } for _, tt := range tests { @@ -103,6 +123,9 @@ func TestRecorder_GetterMethodsWithData(t *testing.T) { if gaugeVec, ok := metric.(*prometheus.GaugeVec); ok { value := testutil.ToFloat64(gaugeVec.WithLabelValues(tt.labels...)) assert.Equal(t, tt.expected, value) + } else if counterVec, ok := metric.(*prometheus.CounterVec); ok { + value := testutil.ToFloat64(counterVec.WithLabelValues(tt.labels...)) + assert.Equal(t, tt.expected, value) } } })